diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index 3dfeeaff60..8bd74fc808 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; @@ -47,21 +46,8 @@ // Priority queue that holds occurred keys private transient PriorityQueue priorityQueue; - // Fast key wrapper in input format for fast comparison private transient KeyWrapper keyWrapper; - // Standard key wrapper in standard format for output - private transient KeyWrapper standardKeyWrapper; - - // Maximum number of rows - private transient int rowLimit; - - // Current number of rows - private transient int rowSize; - - // Rows - private transient Object[] rows; - /** Kryo ctor. */ public TopNKeyOperator() { super(); @@ -96,14 +82,16 @@ protected void initializeOp(Configuration hconf) throws HiveException { this.topN = conf.getTopN(); - String columnSortOrder = conf.getColumnSortOrder(); - boolean[] columnSortOrderIsDesc = new boolean[columnSortOrder.length()]; + final String columnSortOrder = conf.getColumnSortOrder(); + final boolean[] columnSortOrderIsDesc = new boolean[columnSortOrder.length()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { columnSortOrderIsDesc[i] = (columnSortOrder.charAt(i) == '-'); } - ObjectInspector rowInspector = inputObjInspectors[0]; - outputObjInspector = ObjectInspectorUtils.getStandardObjectInspector(rowInspector); + final ObjectInspector rowInspector = inputObjInspectors[0]; + outputObjInspector = rowInspector; + final ObjectInspector standardObjInspector = + ObjectInspectorUtils.getStandardObjectInspector(rowInspector); // init keyFields int numKeys = conf.getKeyColumns().size(); @@ -117,20 +105,15 @@ protected void initializeOp(Configuration hconf) throws HiveException { keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); standardKeyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); - standardKeyObjectInspectors[i] = standardKeyFields[i].initialize(outputObjInspector); + standardKeyObjectInspectors[i] = standardKeyFields[i].initialize(standardObjInspector); } priorityQueue = new PriorityQueue<>(topN + 1, new TopNKeyOperator.KeyWrapperComparator( standardKeyObjectInspectors, standardKeyObjectInspectors, columnSortOrderIsDesc)); - keyWrapper = new KeyWrapperFactory(keyFields, keyObjectInspectors, - standardKeyObjectInspectors).getKeyWrapper(); - standardKeyWrapper = new KeyWrapperFactory(standardKeyFields, standardKeyObjectInspectors, - standardKeyObjectInspectors).getKeyWrapper(); - - rowLimit = VectorizedRowBatch.DEFAULT_SIZE; - rows = new Object[rowLimit]; - rowSize = 0; + final KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, + standardKeyObjectInspectors); + keyWrapper = keyWrapperFactory.getKeyWrapper(); } @Override @@ -144,33 +127,13 @@ public void process(Object row, int tag) throws HiveException { if (priorityQueue.size() > topN) { priorityQueue.poll(); } - - rows[rowSize] = ObjectInspectorUtils.copyToStandardObject(row, inputObjInspectors[0]); - rowSize++; - - if (rowSize % rowLimit == 0) { - processRows(); - } - } - - private void processRows() throws HiveException { - for (int i = 0; i < rowSize; i++) { - Object row = rows[i]; - - standardKeyWrapper.getNewKey(row, outputObjInspector); - standardKeyWrapper.setHashKey(); - - if (priorityQueue.contains(standardKeyWrapper)) { - forward(row, outputObjInspector); - } + if (priorityQueue.contains(keyWrapper)) { + forward(row, outputObjInspector); } - priorityQueue.clear(); - rowSize = 0; } @Override protected final void closeOp(boolean abort) throws HiveException { - processRows(); super.closeOp(abort); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index 6f29f8877c..a81cbc5a5a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -21,6 +21,7 @@ import com.google.common.base.Joiner; import com.google.common.primitives.Ints; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; @@ -160,7 +161,11 @@ public void process(Object data, int tag) throws HiveException { binarySortableSerDe = new BinarySortableSerDe(); Properties properties = new Properties(); Joiner joiner = Joiner.on(','); - properties.setProperty(serdeConstants.LIST_COLUMNS, joiner.join(conf.getKeyColumnNames())); + String[] names = new String[keyTypeInfos.length]; + for (int i = 0; i < keyTypeInfos.length; i++) { + names[i] = HiveConf.getColumnInternalName(i); + } + properties.setProperty(serdeConstants.LIST_COLUMNS, joiner.join(names)); properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, joiner.join(keyTypeInfos)); properties.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, conf.getColumnSortOrder()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java index 721a9b9998..6d25a208b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java @@ -37,6 +37,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Stack; @@ -97,13 +98,25 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Insert a new top n key operator between the group by operator and its parent TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), groupByKeyColumns); - Operator newOperator = OperatorFactory.getAndMakeChild( - groupByOperator.getCompilationOpContext(), (OperatorDesc) topNKeyDesc, - new RowSchema(groupByOperator.getSchema()), groupByOperator.getParentOperators()); - newOperator.getChildOperators().add(groupByOperator); - groupByOperator.getParentOperators().add(newOperator); - parentOperator.removeChild(groupByOperator); - + copyDown(groupByOperator, topNKeyDesc); return null; } + + static TopNKeyOperator copyDown(Operator child, OperatorDesc operatorDesc) { + final List> parents = child.getParentOperators(); + + final Operator newOperator = + OperatorFactory.getAndMakeChild( + child.getCompilationOpContext(), operatorDesc, + new RowSchema(parents.get(0).getSchema()), child.getParentOperators()); + newOperator.setParentOperators(new ArrayList<>(parents)); + newOperator.setChildOperators(new ArrayList<>(Collections.singletonList(child))); + + for (Operator parent : parents) { + parent.removeChild(child); + } + child.setParentOperators(new ArrayList<>(Collections.singletonList(newOperator))); + + return (TopNKeyOperator) newOperator; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java new file mode 100644 index 0000000000..460e8c79ac --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor.copyDown; + +public class TopNKeyPushdownProcessor implements NodeProcessor { + private static final Logger LOG = LoggerFactory.getLogger(TopNKeyPushdownProcessor.class); + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + pushdown((TopNKeyOperator) nd); + return null; + } + + private void pushdown(TopNKeyOperator topNKey) throws SemanticException { + + final Operator parent = + topNKey.getParentOperators().get(0); + + switch (parent.getType()) { + case SELECT: + pushdownThroughSelect(topNKey); + break; + + case FORWARD: + moveDown(topNKey); + pushdown(topNKey); + break; + + case GROUPBY: + pushdownThroughGroupBy(topNKey); + break; + + case REDUCESINK: + pushdownThroughReduceSink(topNKey); + break; + + case MERGEJOIN: + case JOIN: + { + final CommonJoinOperator join = + (CommonJoinOperator) parent; + final JoinCondDesc[] joinConds = join.getConf().getConds(); + final JoinCondDesc firstJoinCond = joinConds[0]; + for (JoinCondDesc joinCond : joinConds) { + if (!firstJoinCond.equals(joinCond)) { + return; + } + } + switch (firstJoinCond.getType()) { + case JoinDesc.FULL_OUTER_JOIN: + pushdownThroughFullOuterJoin(topNKey); + break; + + case JoinDesc.LEFT_OUTER_JOIN: + pushdownThroughLeftOuterJoin(topNKey); + break; + + case JoinDesc.RIGHT_OUTER_JOIN: + pushdownThroughRightOuterJoin(topNKey); + break; + + case JoinDesc.INNER_JOIN: + pushdownThroughInnerJoin(topNKey); + break; + } + } + break; + + case TOPNKEY: + if (hasSameTopNKeyDesc(parent, topNKey.getConf())) { + parent.removeChildAndAdoptItsChildren(topNKey); + } + break; + } + } + + private void pushdownThroughSelect(TopNKeyOperator topNKey) throws SemanticException { + + final SelectOperator select = (SelectOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + select.getColumnExprMap()); + if (mappedColumns.isEmpty()) { + return; + } + + // Move down + topNKeyDesc.setColumnSortOrder(topNKeyDesc.getColumnSortOrder()); + topNKeyDesc.setKeyColumns(mappedColumns); + moveDown(topNKey); + pushdown(topNKey); + } + + private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression, + * we can push it and remove it from above GroupBy. If expression in TopNKey shared common + * prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above + * it. + */ + final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0); + final GroupByDesc groupByDesc = groupBy.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check grouping sets + if (groupByDesc.isGroupingSetsPresent()) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + groupByDesc.getColumnExprMap()); + // If TopNKey expression is same as GroupBy expression + if (!ExprNodeDescUtils.isSame(groupByDesc.getKeys(), mappedColumns)) { + return; + } + + // We can push it and remove it from above GroupBy. + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + topNKeyDesc.getColumnSortOrder(), mappedColumns); + groupBy.removeChildAndAdoptItsChildren(topNKey); + pushdown(copyDown(groupBy, newTopNKeyDesc)); + } + + private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through ReduceSink. If TopNKey expression is same as ReduceSink expression and order is + * the same, we can push it and remove it from above ReduceSink. If expression in TopNKey shared + * common prefix with ReduceSink including same order, TopNKey could be pushed through + * ReduceSink using that prefix and kept above it. + */ + final ReduceSinkOperator reduceSink = (ReduceSinkOperator) topNKey.getParentOperators().get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check orders + if (!reduceSinkDesc.getOrder().equals(topNKeyDesc.getColumnSortOrder())) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + reduceSinkDesc.getColumnExprMap()); + // If TopNKey expression is same as ReduceSink expression + if (!ExprNodeDescUtils.isSame(reduceSinkDesc.getKeyCols(), mappedColumns)) { + return; + } + + // We can push it and remove it from above ReduceSink. + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + topNKeyDesc.getColumnSortOrder(), mappedColumns); + reduceSink.removeChildAndAdoptItsChildren(topNKey); + pushdown(copyDown(reduceSink, newTopNKeyDesc)); + } + + private void pushdownThroughFullOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through FOJ. Push TopNKey expression without keys to largest input. Keep on top of FOJ. + */ + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final ReduceSinkOperator leftInput = (ReduceSinkOperator) join.getParentOperators().get(0); + final ReduceSinkOperator rightInput = (ReduceSinkOperator) join.getParentOperators().get(1); + + // Check null orders + if (!checkNullOrder(leftInput.getConf())) { + return; + } + if (!checkNullOrder(rightInput.getConf())) { + return; + } + + // Map columns + final ReduceSinkOperator joinInput; + final List mappedColumns; + if (leftInput.getStatistics().getDataSize() > rightInput.getStatistics().getDataSize()) { + joinInput = rightInput; + mappedColumns = new ArrayList<>(joinInput.getConf().getKeyCols()); + for (JoinCondDesc cond : join.getConf().getConds()) { + mappedColumns.remove(cond.getRight()); + } + } else { + joinInput = leftInput; + mappedColumns = new ArrayList<>(joinInput.getConf().getKeyCols()); + for (JoinCondDesc cond : join.getConf().getConds()) { + mappedColumns.remove(cond.getLeft()); + } + } + if (mappedColumns.isEmpty()) { + return; + } + + // Copy down + final String mappedOrder = mapOrder(topNKeyDesc.getColumnSortOrder(), + joinInput.getConf().getKeyCols(), mappedColumns); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), mappedOrder, + mappedColumns); + pushdown(copyDown(joinInput, newTopNKeyDesc)); + } + + private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + pushdownThroughLeftOrRightOuterJoin(topNKey, 0); + } + + private void pushdownThroughRightOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + pushdownThroughLeftOrRightOuterJoin(topNKey, 1); + } + + private void pushdownThroughLeftOrRightOuterJoin(TopNKeyOperator topNKey, int position) + throws SemanticException { + /* + * Push through LOJ. If TopNKey expression refers fully to expressions from left input, push + * with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix + * that refers to expressions from left input, push with rewriting of those expressions and keep + * on top of LOJ. + */ + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final List> joinInputs = join.getParentOperators(); + final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(position); + final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + // Check null order + if (!checkNullOrder(reduceSinkDesc)) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(mapColumns(topNKeyDesc.getKeyColumns(), + join.getColumnExprMap()), reduceSinkOperator.getColumnExprMap()); + if (mappedColumns.isEmpty()) { + return; + } + + // Copy down + final String mappedOrder = mapOrder(topNKeyDesc.getColumnSortOrder(), + reduceSinkDesc.getKeyCols(), mappedColumns); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), mappedOrder, + mappedColumns); + pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); + + // If all columns are mapped, remove from top + if (topNKeyDesc.getKeyColumns().size() == mappedColumns.size()) { + join.removeChildAndAdoptItsChildren(topNKey); + } + } + + private void pushdownThroughInnerJoin(TopNKeyOperator topNKey) throws SemanticException { + + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final List> joinInputs = join.getParentOperators(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // For each join input, + for (Operator joinInput : joinInputs) { + + // Map columns + final List mappedColumns = mapColumns(mapColumns(topNKeyDesc.getKeyColumns(), + join.getColumnExprMap()), joinInput.getColumnExprMap()); + if (mappedColumns.isEmpty()) { + continue; + } + + // Copy down + final String mappedOrder = mapOrder(topNKeyDesc.getColumnSortOrder(), + ((ReduceSinkOperator) joinInput).getConf().getKeyCols(), mappedColumns); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), mappedOrder, + mappedColumns); + pushdown(copyDown(joinInput, newTopNKeyDesc)); + } + } + + private static boolean hasSameTopNKeyDesc(Operator operator, + TopNKeyDesc desc) { + + if (operator instanceof TopNKeyOperator) { + final TopNKeyOperator topNKey = (TopNKeyOperator) operator; + final TopNKeyDesc opDesc = topNKey.getConf(); + if (opDesc.isSame(desc)) { + return true; + } + } + return false; + } + + private static String mapOrder(String order, List parentCols, List + mappedCols) { + + final StringBuilder builder = new StringBuilder(); + int index = 0; + for (ExprNodeDesc mappedCol : mappedCols) { + if (parentCols.contains(mappedCol)) { + builder.append(order.charAt(index++)); + } else { + builder.append("+"); + } + } + return builder.toString(); + } + + private static List mapColumns(List columns, Map + colExprMap) { + + if (colExprMap == null) { + return columns; + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } + } + return mappedColumns; + } + + private static void moveDown(TopNKeyOperator topNKey) throws SemanticException { + + assert topNKey.getNumParent() == 1; + final Operator parent = topNKey.getParentOperators().get(0); + final List> grandParents = parent.getParentOperators(); + parent.removeChildAndAdoptItsChildren(topNKey); + for (Operator grandParent : grandParents) { + grandParent.replaceChild(parent, topNKey); + } + topNKey.setParentOperators(new ArrayList<>(grandParents)); + topNKey.setChildOperators(new ArrayList<>(Collections.singletonList(parent))); + parent.setParentOperators(new ArrayList<>(Collections.singletonList(topNKey))); + } + + private static boolean checkNullOrder(ReduceSinkDesc reduceSinkDesc) { + + final String order = reduceSinkDesc.getOrder(); + final String nullOrder = reduceSinkDesc.getNullOrder(); + if (nullOrder == null) { + for (int i = 0; i < order.length(); i++) { + if (order.charAt(i) != '+') { + return false; + } + } + } else { + for (int i = 0; i < nullOrder.length(); i++) { + if (nullOrder.charAt(i) != 'a') { + return false; + } + } + } + return true; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index f316f09953..e2458cda13 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -82,6 +83,7 @@ import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer; import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; +import org.apache.hadoop.hive.ql.optimizer.TopNKeyPushdownProcessor; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -421,6 +423,12 @@ private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin()); + if (procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) { + opRules.put( + new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), + new TopNKeyPushdownProcessor()); + } + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); diff --git ql/src/test/queries/clientpositive/topnkey.q ql/src/test/queries/clientpositive/topnkey.q index e02a41dd57..7a4f9f1a87 100644 --- ql/src/test/queries/clientpositive/topnkey.q +++ ql/src/test/queries/clientpositive/topnkey.q @@ -24,8 +24,29 @@ SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; +explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +set hive.optimize.topnkey=false; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q index e1b7d26afe..2a0cab80ae 100644 --- ql/src/test/queries/clientpositive/vector_topnkey.q +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -14,17 +14,39 @@ set hive.tez.dynamic.partition.pruning=true; set hive.stats.fetch.column.stats=true; set hive.cbo.enable=true; -explain vectorization detail +explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail +explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +set hive.optimize.topnkey=false; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 726d46b479..0ddde98fa3 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -64,15 +64,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '100') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -197,15 +197,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -304,15 +304,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: length(key) (type: int) - outputColumnNames: _col0 + Top N Key Operator + sort order: + + keys: length(key) (type: int) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) + top n: 10 + Select Operator + expressions: length(key) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: _col0 (type: int) @@ -391,15 +391,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: abs(length(key)) (type: int) - outputColumnNames: _col0 + Top N Key Operator + sort order: + + keys: abs(length(key)) (type: int) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) + top n: 10 + Select Operator + expressions: abs(length(key)) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: _col0 (type: int) @@ -479,15 +479,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -587,15 +587,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: value (type: string) + top n: 10 + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: value (type: string) @@ -1193,15 +1193,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '102') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() bucketGroup: true @@ -1302,15 +1302,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '102') (type: boolean) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: value (type: string) + top n: 10 + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: value (type: string) @@ -1511,15 +1511,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '103') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() bucketGroup: true @@ -1620,15 +1620,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '103') (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: key (type: string), value (type: string) + top n: 10 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index ec1ed64fe8..95ae2bbce2 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1671,15 +1671,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) + top n: 10 + Select Operator + expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: min(_col2), max(_col3) keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index a98191653f..edda11f898 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1264,10 +1264,10 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Top N Key Operator [TNK_15] (rows=10 width=101) - keys:_col1, _col0,sort order:++,top n:1 - Select Operator [SEL_5] (rows=10 width=101) - Output:["_col0","_col1"] + Select Operator [SEL_5] (rows=10 width=101) + Output:["_col0","_col1"] + Top N Key Operator [TNK_15] (rows=10 width=101) + keys:(((UDFToFloat(_col1) + _col2) + 1.0) + 2.0), _col3,sort order:++,top n:1 Group By Operator [GBY_4] (rows=10 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 1 [SIMPLE_EDGE] llap diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index d8216817c8..601561e517 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -303,14 +303,14 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_217] - Limit [LIM_216] (rows=2 width=285) + File Output Operator [FS_231] + Limit [LIM_230] (rows=2 width=285) Number of rows:100 - Select Operator [SEL_215] (rows=2 width=285) + Select Operator [SEL_229] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_214] - Group By Operator [GBY_213] (rows=2 width=285) + SHUFFLE [RS_228] + Group By Operator [GBY_227] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_49] @@ -326,103 +326,123 @@ Stage-0 PartitionCols:_col15, _col17 Select Operator [SEL_40] (rows=2 width=447) Output:["_col4","_col5","_col14","_col15","_col17"] - Merge Join Operator [MERGEJOIN_187] (rows=2 width=447) - Conds:RS_37._col4, _col2=RS_38._col4, _col2(Inner),Output:["_col0","_col1","_col14","_col15","_col17"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] - PartitionCols:_col4, _col2 - Merge Join Operator [MERGEJOIN_186] (rows=7 width=356) - Conds:RS_212._col0=RS_200._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_200] - PartitionCols:_col0 - Select Operator [SEL_197] (rows=25 width=178) - Output:["_col0"] - Filter Operator [FIL_194] (rows=25 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=7 width=531) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_210] (rows=7 width=534) - predicate:((v1 = 'srv1') and k1 is not null and k2 is not null and k3 is not null and v2 is not null and v3 is not null) - TableScan [TS_18] (rows=85 width=534) - default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_37] - PartitionCols:_col4, _col2 - Merge Join Operator [MERGEJOIN_185] (rows=2 width=352) - Conds:RS_34._col1=RS_209._col1(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 14 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_209] - PartitionCols:_col1 - Select Operator [SEL_208] (rows=2 width=180) - Output:["_col1"] - Filter Operator [FIL_207] (rows=2 width=175) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=175) - default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=2 width=352) - Conds:RS_31._col3=RS_206._col1(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_206] + Top N Key Operator [TNK_191] + keys:_col0, _col14,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_187] (rows=2 width=447) + Conds:RS_37._col4, _col2=RS_38._col4, _col2(Inner),Output:["_col0","_col1","_col14","_col15","_col17"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col4, _col2 + Top N Key Operator [TNK_197] + keys:_col2,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_186] (rows=7 width=356) + Conds:RS_226._col0=RS_211._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_211] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=25 width=178) + Output:["_col0"] + Filter Operator [FIL_205] (rows=25 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=178) + default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_226] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=7 width=531) + Output:["_col0","_col2","_col3","_col4","_col5"] + Top N Key Operator [TNK_224] + keys:k2,sort order:+,top n:100 + Filter Operator [FIL_223] (rows=7 width=534) + predicate:((v1 = 'srv1') and k1 is not null and k2 is not null and k3 is not null and v2 is not null and v3 is not null) + TableScan [TS_18] (rows=85 width=534) + default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_37] + PartitionCols:_col4, _col2 + Top N Key Operator [TNK_192] + keys:_col0, _col2,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_185] (rows=2 width=352) + Conds:RS_34._col1=RS_222._col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_222] PartitionCols:_col1 - Select Operator [SEL_205] (rows=6 width=185) + Select Operator [SEL_221] (rows=2 width=180) Output:["_col1"] - Filter Operator [FIL_204] (rows=6 width=178) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_12] (rows=2000 width=178) - default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_183] (rows=2 width=443) - Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_199] - PartitionCols:_col0 - Select Operator [SEL_196] (rows=2 width=178) - Output:["_col0"] - Filter Operator [FIL_193] (rows=2 width=178) - predicate:((value = 'd1value') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Map 12 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_203] - PartitionCols:_col0 - Select Operator [SEL_202] (rows=7 width=531) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_201] (rows=7 width=534) - predicate:((v3 = 'ssv3') and k1 is not null and k2 is not null and k3 is not null and v1 is not null and v2 is not null) - TableScan [TS_6] (rows=85 width=534) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + Filter Operator [FIL_220] (rows=2 width=175) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=175) + default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] + PartitionCols:_col1 + Top N Key Operator [TNK_193] + keys:_col0, _col2,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_184] (rows=2 width=352) + Conds:RS_31._col3=RS_219._col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_219] + PartitionCols:_col1 + Select Operator [SEL_218] (rows=6 width=185) + Output:["_col1"] + Filter Operator [FIL_217] (rows=6 width=178) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_12] (rows=2000 width=178) + default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] + PartitionCols:_col3 + Top N Key Operator [TNK_194] + keys:_col0, _col2,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_183] (rows=2 width=443) + Conds:RS_216._col0=RS_212._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_212] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=2 width=178) + Output:["_col0"] + Top N Key Operator [TNK_207] + keys:key,sort order:+,top n:100 + Filter Operator [FIL_204] (rows=2 width=178) + predicate:((value = 'd1value') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 12 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=7 width=531) + Output:["_col0","_col1","_col2","_col3","_col4"] + Top N Key Operator [TNK_214] + keys:k1, k2,sort order:++,top n:100 + Filter Operator [FIL_213] (rows=7 width=534) + predicate:((v3 = 'ssv3') and k1 is not null and k2 is not null and k3 is not null and v1 is not null and v2 is not null) + TableScan [TS_6] (rows=85 width=534) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_44] PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_182] (rows=170 width=269) - Conds:RS_191._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_198] - PartitionCols:_col0 - Select Operator [SEL_195] (rows=25 width=178) - Output:["_col0"] - Filter Operator [FIL_192] (rows=25 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_191] - PartitionCols:_col0 - Select Operator [SEL_190] (rows=170 width=356) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_189] (rows=170 width=356) - predicate:(k1 is not null and v2 is not null and v3 is not null) - TableScan [TS_0] (rows=170 width=356) - default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] + Top N Key Operator [TNK_189] + keys:_col2,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_182] (rows=170 width=269) + Conds:RS_202._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_206] (rows=25 width=178) + Output:["_col0"] + Filter Operator [FIL_203] (rows=25 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_202] + PartitionCols:_col0 + Select Operator [SEL_201] (rows=170 width=356) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_200] + keys:k3,sort order:+,top n:100 + Filter Operator [FIL_199] (rows=170 width=356) + predicate:(k1 is not null and v2 is not null and v3 is not null) + TableScan [TS_0] (rows=170 width=356) + default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 4fc1419acd..804b418123 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -195,15 +195,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -296,15 +296,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -401,15 +401,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 20 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator keys: cdouble (type: double) mode: hash @@ -932,15 +932,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 2 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 Group By Operator aggregations: count() keys: key (type: string) @@ -954,11 +954,15 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Top N Key Operator - sort order: + - keys: key (type: string) + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 Group By Operator aggregations: count() keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 1027bfe85a..7d75933f86 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -196,15 +196,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -311,15 +311,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -430,15 +430,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 20 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator keys: cdouble (type: double) mode: hash diff --git ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index bf12ce919e..19996896bb 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -72,17 +72,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (ds <= '2008-04-08') (type: boolean) - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,29 +91,29 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -122,18 +122,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, hr - Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-12-31' (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -145,14 +145,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -165,14 +165,14 @@ STAGE PLANS: keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index 9f54bbc163..c3602fe7f0 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -53,15 +53,15 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -71,10 +71,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -83,18 +83,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: struct), _col3 (type: struct) Reducer 3 Execution mode: llap @@ -104,14 +104,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out index f00343ea83..ec4d25e3cd 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out @@ -58,14 +58,14 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -74,18 +74,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -97,14 +97,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/offset_limit.q.out ql/src/test/results/clientpositive/llap/offset_limit.q.out index 97d2ac25b4..5d33787201 100644 --- ql/src/test/results/clientpositive/llap/offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit.q.out @@ -22,15 +22,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 2e8d5f375f..03fcd10969 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -197,15 +197,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 30 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -299,15 +299,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 30 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -405,15 +405,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 30 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator keys: cdouble (type: double) mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index a804e3c193..2a2c06fe2d 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -229,31 +229,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 4:int, val 500)(children: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int) predicate: (st1.f1 > 500) (type: boolean) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: st1.f1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + Top N Key Operator + sort order: + + keys: st1.f1 (type: int) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 4:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + native: true + Select Operator + expressions: st1.f1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int + Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint + aggregators: VectorUDAFSumLong(col 5:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 4:int + keyExpressions: col 5:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -340,11 +340,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -371,20 +371,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -398,11 +398,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -416,11 +416,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -434,11 +434,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -465,20 +465,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -492,11 +492,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -510,11 +510,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -528,11 +528,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index baeb60e045..36a94fc811 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,11 +78,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -154,11 +154,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -205,31 +205,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -343,11 +343,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -376,11 +376,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -452,11 +452,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -503,31 +503,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -641,11 +641,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -674,11 +674,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -750,11 +750,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -801,31 +801,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -939,11 +939,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index b036cddbea..78e2804795 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -167,12 +167,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -221,31 +221,31 @@ STAGE PLANS: predicateExpression: FilterStringColLikeStringScalar(col 8:string, pattern v100%)(children: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string) predicate: (stringmap['k1'] like 'v100%') (type: boolean) Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [8, 9, 10] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double + Top N Key Operator + sort order: + + keys: stringmap['k1'] (type: string) Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 8:string + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string + native: true + Select Operator + expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [9, 10, 11] + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 9:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 10:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 11:double + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), sum(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 9:int) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 8:string + keyExpressions: col 9:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] @@ -360,12 +360,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -396,22 +396,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -426,12 +426,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -462,22 +462,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -492,12 +492,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index fec8093617..7a48ba7018 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -229,31 +229,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 4:int, val 500)(children: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int) predicate: (st1.f1 > 500) (type: boolean) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: st1.f1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + Top N Key Operator + sort order: + + keys: st1.f1 (type: int) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 4:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + native: true + Select Operator + expressions: st1.f1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int + Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint + aggregators: VectorUDAFSumLong(col 5:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 4:int + keyExpressions: col 5:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -340,11 +340,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -371,20 +371,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -398,11 +398,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -416,11 +416,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -434,11 +434,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -465,20 +465,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -492,11 +492,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -510,11 +510,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -528,11 +528,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/topnkey.q.out ql/src/test/results/clientpositive/llap/topnkey.q.out index c1d8874bb9..fd25a046b8 100644 --- ql/src/test/results/clientpositive/llap/topnkey.q.out +++ ql/src/test/results/clientpositive/llap/topnkey.q.out @@ -22,15 +22,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -122,15 +122,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator keys: key (type: string) mode: hash @@ -194,16 +194,12 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -213,8 +209,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -226,6 +223,153 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -237,15 +381,146 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -263,29 +538,44 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -303,16 +593,273 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 8c74a924c2..ffc6816861 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -133,23 +133,23 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2] + Top N Key Operator + sort order: + + keys: i (type: int) Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 2:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index b58de039f2..9eecba5f6c 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -93,24 +93,24 @@ STAGE PLANS: Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3] - selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Top N Key Operator + sort order: + + keys: value (type: char(20)) Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: char(20)) - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 1:char(20) + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:char(20) + native: true + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -298,24 +298,24 @@ STAGE PLANS: Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3] - selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Top N Key Operator + sort order: - + keys: value (type: char(20)) Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: - - keys: _col0 (type: char(20)) - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 1:char(20) + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:char(20) + native: true + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index 1f49804ca6..4eabff4bbb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -60,30 +60,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -120,7 +120,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -270,30 +270,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -330,7 +330,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -480,30 +480,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -540,7 +540,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -690,28 +690,28 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string), c (type: string) - outputColumnNames: a, b, c - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] + Top N Key Operator + sort order: ++++ + keys: a (type: string), b (type: string), c (type: string) Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++++ - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string, col 2:string + native: true + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] @@ -747,7 +747,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -894,23 +894,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string) - outputColumnNames: a - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: a (type: string) Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: a (type: string) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Select Operator + expressions: a (type: string) + outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1084,31 +1084,31 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6] - selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double + Top N Key Operator + sort order: + + keys: (UDFToDouble(a) + UDFToDouble(b)) (type: double) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: double) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:double + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double + native: true + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7] + selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 7:double + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:double + keyExpressions: col 7:double native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1145,7 +1145,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index bdcc286181..4c115014c0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -258,23 +258,23 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: ss_ticket_number (type: int) - outputColumnNames: ss_ticket_number - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [9] + Top N Key Operator + sort order: + + keys: ss_ticket_number (type: int) Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ss_ticket_number (type: int) - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 9:int + top n: 20 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:int + native: true + Select Operator + expressions: ss_ticket_number (type: int) + outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [9] + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index e81d7dfc38..a2857cf341 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -317,16 +317,20 @@ STAGE PLANS: Filter Operator predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Top N Key Operator + sort order: + + keys: s_state (type: string) + top n: 100 + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index a4f32f16fa..de5bb5cf9a 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -343,29 +343,29 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Operator - expressions: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [20] - selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string + Top N Key Operator + sort order: + + keys: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 20:string + top n: 50 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string + native: true + Select Operator + expressions: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [21] + selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 21:string + Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 20:string + keyExpressions: col 21:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index 16803c9544..c1b2bccee0 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -26,37 +26,17 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string - native: true Group By Operator aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -65,12 +45,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -85,38 +59,17 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -124,12 +77,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -138,36 +85,19 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,10 +123,10 @@ POSTHOOK: Input: default@src 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -221,34 +151,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: key - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string - native: true Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -257,12 +169,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -276,36 +182,16 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY._col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -313,12 +199,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -326,36 +206,19 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -381,11 +244,11 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -400,8 +263,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -410,34 +274,198 @@ STAGE PLANS: alias: src1 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 Select Operator expressions: key (type: string) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -450,46 +478,188 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -503,68 +673,66 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -577,16 +745,307 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 51976 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 3d5bea143b..b38e446a07 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -483,23 +483,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: ctinyint (type: tinyint) Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ctinyint (type: tinyint) - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:tinyint + top n: 20 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:tinyint + native: true + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git ql/src/test/results/clientpositive/perf/tez/query10.q.out ql/src/test/results/clientpositive/perf/tez/query10.q.out index 799510080a..72d0d7f679 100644 --- ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -685,15 +685,15 @@ STAGE PLANS: Filter Operator predicate: (_col15 is not null or _col17 is not null) (type: boolean) Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) - outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Top N Key Operator + sort order: ++++++++ + keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++++++++ - keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) + top n: 100 + Select Operator + expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) + outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: count() keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) diff --git ql/src/test/results/clientpositive/perf/tez/query15.q.out ql/src/test/results/clientpositive/perf/tez/query15.q.out index a4adc2ed1c..d2cef95f54 100644 --- ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -315,15 +315,15 @@ STAGE PLANS: Filter Operator predicate: ((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col7 (type: decimal(7,2)) - outputColumnNames: _col4, _col7 + Top N Key Operator + sort order: + + keys: _col4 (type: string) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col4 (type: string) + top n: 100 + Select Operator + expressions: _col4 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col4, _col7 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col7) keys: _col4 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query17.q.out ql/src/test/results/clientpositive/perf/tez/query17.q.out index 6821094e74..8c259834c9 100644 --- ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -814,15 +814,15 @@ STAGE PLANS: 1 Map 19 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col9 (type: string), _col10 (type: string), _col25 (type: string), _col5 (type: int), _col21 (type: int), _col14 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col14) (type: double), (UDFToDouble(_col14) * UDFToDouble(_col14)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Top N Key Operator + sort order: +++ + keys: _col9 (type: string), _col10 (type: string), _col25 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + top n: 100 + Select Operator + expressions: _col9 (type: string), _col10 (type: string), _col25 (type: string), _col5 (type: int), _col21 (type: int), _col14 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col14) (type: double), (UDFToDouble(_col14) * UDFToDouble(_col14)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query26.q.out ql/src/test/results/clientpositive/perf/tez/query26.q.out index d1e8840363..f896fd53cc 100644 --- ql/src/test/results/clientpositive/perf/tez/query26.q.out +++ ql/src/test/results/clientpositive/perf/tez/query26.q.out @@ -191,52 +191,60 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: i_item_id (type: string) + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:string native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map Vectorization: enabled: true diff --git ql/src/test/results/clientpositive/perf/tez/query27.q.out ql/src/test/results/clientpositive/perf/tez/query27.q.out index 5bb57cbfe0..4e69eeb778 100644 --- ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -196,52 +196,60 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: i_item_id (type: string) + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:string native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map Vectorization: enabled: true @@ -516,12 +524,16 @@ STAGE PLANS: 1 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col1 (type: int) + Top N Key Operator sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) + keys: _col15 (type: string) + top n: 100 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -532,15 +544,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col17 (type: string), _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Top N Key Operator + sort order: +++ + keys: _col17 (type: string), _col15 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) + top n: 100 + Select Operator + expressions: _col17 (type: string), _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/perf/tez/query29.q.out ql/src/test/results/clientpositive/perf/tez/query29.q.out index 5ce09eeffd..e030f2f8d8 100644 --- ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -137,24 +137,32 @@ STAGE PLANS: predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) predicate: ((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: cs_quantity (type: int) + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 18:int native: true - projectedOutputColumnNums: [0, 3, 15, 18] - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 3, 15, 18] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized Map Vectorization: enabled: true @@ -693,16 +701,20 @@ STAGE PLANS: 1 Map 20 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) - outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col14 (type: int), _col13 (type: int) - sort order: ++ - Map-reduce partition columns: _col14 (type: int), _col13 (type: int) + Top N Key Operator + sort order: ++++ + keys: _col18 (type: string), _col19 (type: string), _col21 (type: string), _col22 (type: string) + top n: 100 + Select Operator + expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) + outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) + Reduce Output Operator + key expressions: _col14 (type: int), _col13 (type: int) + sort order: ++ + Map-reduce partition columns: _col14 (type: int), _col13 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) Reducer 13 Execution mode: vectorized Reduce Vectorization: @@ -858,12 +870,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + Top N Key Operator + sort order: + + keys: _col3 (type: int) + top n: 100 + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) Reducer 21 Execution mode: vectorized Reduce Vectorization: diff --git ql/src/test/results/clientpositive/perf/tez/query35.q.out ql/src/test/results/clientpositive/perf/tez/query35.q.out index bf542286bd..3cd51f758c 100644 --- ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -837,15 +837,15 @@ STAGE PLANS: Filter Operator predicate: (_col12 is not null or _col14 is not null) (type: boolean) Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) - outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10 + Top N Key Operator + sort order: ++++++ + keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++++++ - keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) + top n: 100 + Select Operator + expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: count(), sum(_col8), count(_col8), max(_col8), sum(_col9), count(_col9), max(_col9), sum(_col10), count(_col10), max(_col10) keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) diff --git ql/src/test/results/clientpositive/perf/tez/query40.q.out ql/src/test/results/clientpositive/perf/tez/query40.q.out index e7537d422d..30b7aa4d1e 100644 --- ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -445,15 +445,15 @@ STAGE PLANS: 1 Map 10 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + Top N Key Operator + sort order: ++ + keys: _col14 (type: string), _col11 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) + top n: 100 + Select Operator + expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3) keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query43.q.out ql/src/test/results/clientpositive/perf/tez/query43.q.out index 6d372bb9f8..875825d906 100644 --- ql/src/test/results/clientpositive/perf/tez/query43.q.out +++ ql/src/test/results/clientpositive/perf/tez/query43.q.out @@ -261,15 +261,15 @@ STAGE PLANS: 1 Map 7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col8 (type: string), _col7 (type: string), CASE WHEN ((_col5 = 'Sunday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Monday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Tuesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Wednesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Thursday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Friday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Saturday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Top N Key Operator + sort order: ++ + keys: _col8 (type: string), _col7 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) + top n: 100 + Select Operator + expressions: _col8 (type: string), _col7 (type: string), CASE WHEN ((_col5 = 'Sunday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Monday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Tuesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Wednesday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Thursday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Friday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col5 = 'Saturday')) THEN (_col2) ELSE (null) END (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8) keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query45.q.out ql/src/test/results/clientpositive/perf/tez/query45.q.out index 199e530a39..70d5cb4f19 100644 --- ql/src/test/results/clientpositive/perf/tez/query45.q.out +++ ql/src/test/results/clientpositive/perf/tez/query45.q.out @@ -563,15 +563,15 @@ STAGE PLANS: Filter Operator predicate: ((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0L)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) (type: boolean) Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col3, _col7, _col8 + Top N Key Operator + sort order: ++ + keys: _col8 (type: string), _col7 (type: string) Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col8 (type: string), _col7 (type: string) + top n: 100 + Select Operator + expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col3, _col7, _col8 Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col3) keys: _col8 (type: string), _col7 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query50.q.out ql/src/test/results/clientpositive/perf/tez/query50.q.out index 8e92a5f6ed..98a6013d89 100644 --- ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -553,15 +553,15 @@ STAGE PLANS: 1 Map 13 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Top N Key Operator + sort order: ++++++++++ + keys: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++++++++++ - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + top n: 100 + Select Operator + expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query66.q.out ql/src/test/results/clientpositive/perf/tez/query66.q.out index 9cd31cc1d3..0deaef779f 100644 --- ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -1032,24 +1032,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] - selectExpressions: DecimalColDivideDecimalColumn(col 6:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 31:decimal(38,12), DecimalColDivideDecimalColumn(col 7:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 32:decimal(38,12), DecimalColDivideDecimalColumn(col 8:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 33:decimal(38,12), DecimalColDivideDecimalColumn(col 9:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 34:decimal(38,12), DecimalColDivideDecimalColumn(col 10:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 35:decimal(38,12), DecimalColDivideDecimalColumn(col 11:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 36:decimal(38,12), DecimalColDivideDecimalColumn(col 12:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 37:decimal(38,12), DecimalColDivideDecimalColumn(col 13:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 38:decimal(38,12), DecimalColDivideDecimalColumn(col 14:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 39:decimal(38,12), DecimalColDivideDecimalColumn(col 15:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 40:decimal(38,12), DecimalColDivideDecimalColumn(col 16:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 41:decimal(38,12), DecimalColDivideDecimalColumn(col 17:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 42:decimal(38,12) + Top N Key Operator + sort order: ++++++ + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE - Top N Key Operator - sort order: ++++++ - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE - top n: 100 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string + native: true + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + selectExpressions: DecimalColDivideDecimalColumn(col 6:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 31:decimal(38,12), DecimalColDivideDecimalColumn(col 7:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 32:decimal(38,12), DecimalColDivideDecimalColumn(col 8:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 33:decimal(38,12), DecimalColDivideDecimalColumn(col 9:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 34:decimal(38,12), DecimalColDivideDecimalColumn(col 10:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 35:decimal(38,12), DecimalColDivideDecimalColumn(col 11:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 36:decimal(38,12), DecimalColDivideDecimalColumn(col 12:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 37:decimal(38,12), DecimalColDivideDecimalColumn(col 13:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 38:decimal(38,12), DecimalColDivideDecimalColumn(col 14:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 39:decimal(38,12), DecimalColDivideDecimalColumn(col 15:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 40:decimal(38,12), DecimalColDivideDecimalColumn(col 16:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 41:decimal(38,12), DecimalColDivideDecimalColumn(col 17:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 42:decimal(38,12) + Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29), sum(_col30), sum(_col31), sum(_col32), sum(_col33), sum(_col34), sum(_col35), sum(_col36), sum(_col37), sum(_col38), sum(_col39), sum(_col40), sum(_col41) Group By Vectorization: @@ -1291,24 +1291,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] - selectExpressions: DecimalColDivideDecimalColumn(col 6:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 31:decimal(38,12), DecimalColDivideDecimalColumn(col 7:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 32:decimal(38,12), DecimalColDivideDecimalColumn(col 8:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 33:decimal(38,12), DecimalColDivideDecimalColumn(col 9:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 34:decimal(38,12), DecimalColDivideDecimalColumn(col 10:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 35:decimal(38,12), DecimalColDivideDecimalColumn(col 11:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 36:decimal(38,12), DecimalColDivideDecimalColumn(col 12:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 37:decimal(38,12), DecimalColDivideDecimalColumn(col 13:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 38:decimal(38,12), DecimalColDivideDecimalColumn(col 14:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 39:decimal(38,12), DecimalColDivideDecimalColumn(col 15:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 40:decimal(38,12), DecimalColDivideDecimalColumn(col 16:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 41:decimal(38,12), DecimalColDivideDecimalColumn(col 17:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 42:decimal(38,12) + Top N Key Operator + sort order: ++++++ + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE - Top N Key Operator - sort order: ++++++ - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE - top n: 100 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string + native: true + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), (_col6 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col7 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col8 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col9 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col10 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col11 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col12 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col13 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col14 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col15 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col16 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), (_col17 / CAST( _col1 AS decimal(10,0))) (type: decimal(38,12)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + selectExpressions: DecimalColDivideDecimalColumn(col 6:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 31:decimal(38,12), DecimalColDivideDecimalColumn(col 7:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 32:decimal(38,12), DecimalColDivideDecimalColumn(col 8:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 33:decimal(38,12), DecimalColDivideDecimalColumn(col 9:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 34:decimal(38,12), DecimalColDivideDecimalColumn(col 10:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 35:decimal(38,12), DecimalColDivideDecimalColumn(col 11:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 36:decimal(38,12), DecimalColDivideDecimalColumn(col 12:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 37:decimal(38,12), DecimalColDivideDecimalColumn(col 13:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 38:decimal(38,12), DecimalColDivideDecimalColumn(col 14:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 39:decimal(38,12), DecimalColDivideDecimalColumn(col 15:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 40:decimal(38,12), DecimalColDivideDecimalColumn(col 16:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 41:decimal(38,12), DecimalColDivideDecimalColumn(col 17:decimal(28,2), col 30:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 30:decimal(10,0)) -> 42:decimal(38,12) + Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29), sum(_col30), sum(_col31), sum(_col32), sum(_col33), sum(_col34), sum(_col35), sum(_col36), sum(_col37), sum(_col38), sum(_col39), sum(_col40), sum(_col41) Group By Vectorization: diff --git ql/src/test/results/clientpositive/perf/tez/query69.q.out ql/src/test/results/clientpositive/perf/tez/query69.q.out index a5a56e3844..0113821525 100644 --- ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -683,15 +683,15 @@ STAGE PLANS: Filter Operator predicate: _col14 is null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) - outputColumnNames: _col6, _col7, _col8, _col9, _col10 + Top N Key Operator + sort order: +++++ + keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++++ - keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) + top n: 100 + Select Operator + expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) + outputColumnNames: _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - top n: 100 Group By Operator aggregations: count() keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) diff --git ql/src/test/results/clientpositive/perf/tez/query7.q.out ql/src/test/results/clientpositive/perf/tez/query7.q.out index 7b2b595c1b..88ba741be0 100644 --- ql/src/test/results/clientpositive/perf/tez/query7.q.out +++ ql/src/test/results/clientpositive/perf/tez/query7.q.out @@ -191,52 +191,60 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: i_item_id (type: string) + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:string native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFBloomFilter(col 0:int) -> binary + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map Vectorization: enabled: true diff --git ql/src/test/results/clientpositive/perf/tez/query99.q.out ql/src/test/results/clientpositive/perf/tez/query99.q.out index 3cedff6cde..cb89477758 100644 --- ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -167,31 +167,31 @@ STAGE PLANS: 1 Map 7 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [38, 37, 35, 39, 41, 42, 43, 40] - selectExpressions: StringSubstrColStartLen(col 36:string, start 0, length 20) -> 38:string, IfExprLongScalarLongScalar(col 40:boolean, val 1, val 0)(children: LongColLessEqualLongScalar(col 39:int, val 30)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 39:int) -> 40:boolean) -> 39:int, IfExprLongScalarLongScalar(col 40:boolean, val 1, val 0)(children: ColAndCol(col 41:boolean, col 42:boolean)(children: LongColGreaterLongScalar(col 40:int, val 30)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 41:boolean, LongColLessEqualLongScalar(col 40:int, val 60)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 42:boolean) -> 40:boolean) -> 41:int, IfExprLongScalarLongScalar(col 40:boolean, val 1, val 0)(children: ColAndCol(col 42:boolean, col 43:boolean)(children: LongColGreaterLongScalar(col 40:int, val 60)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 42:boolean, LongColLessEqualLongScalar(col 40:int, val 90)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 43:boolean) -> 40:boolean) -> 42:int, IfExprLongScalarLongScalar(col 40:boolean, val 1, val 0)(children: ColAndCol(col 43:boolean, col 44:boolean)(children: LongColGreaterLongScalar(col 40:int, val 90)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 43:boolean, LongColLessEqualLongScalar(col 40:int, val 120)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 44:boolean) -> 40:boolean) -> 43:int, IfExprLongScalarLongScalar(col 44:boolean, val 1, val 0)(children: LongColGreaterLongScalar(col 40:int, val 120)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 44:boolean) -> 40:int + Top N Key Operator + sort order: +++ + keys: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Top N Key Operator - sort order: +++ - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - top n: 100 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 38:string, col 37:string, col 35:string + top n: 100 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: StringSubstrColStartLen(col 36:string, start 0, length 20) -> 38:string, col 37:string, col 35:string + native: true + Select Operator + expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [39, 37, 35, 40, 42, 43, 44, 41] + selectExpressions: StringSubstrColStartLen(col 36:string, start 0, length 20) -> 39:string, IfExprLongScalarLongScalar(col 41:boolean, val 1, val 0)(children: LongColLessEqualLongScalar(col 40:int, val 30)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 40:int) -> 41:boolean) -> 40:int, IfExprLongScalarLongScalar(col 41:boolean, val 1, val 0)(children: ColAndCol(col 42:boolean, col 43:boolean)(children: LongColGreaterLongScalar(col 41:int, val 30)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 42:boolean, LongColLessEqualLongScalar(col 41:int, val 60)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 43:boolean) -> 41:boolean) -> 42:int, IfExprLongScalarLongScalar(col 41:boolean, val 1, val 0)(children: ColAndCol(col 43:boolean, col 44:boolean)(children: LongColGreaterLongScalar(col 41:int, val 60)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 43:boolean, LongColLessEqualLongScalar(col 41:int, val 90)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 44:boolean) -> 41:boolean) -> 43:int, IfExprLongScalarLongScalar(col 41:boolean, val 1, val 0)(children: ColAndCol(col 44:boolean, col 45:boolean)(children: LongColGreaterLongScalar(col 41:int, val 90)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 44:boolean, LongColLessEqualLongScalar(col 41:int, val 120)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 45:boolean) -> 41:boolean) -> 44:int, IfExprLongScalarLongScalar(col 45:boolean, val 1, val 0)(children: LongColGreaterLongScalar(col 41:int, val 120)(children: LongColSubtractLongColumn(col 2:int, col 0:int) -> 41:int) -> 45:boolean) -> 41:int + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 39:int) -> bigint, VectorUDAFSumLong(col 41:int) -> bigint, VectorUDAFSumLong(col 42:int) -> bigint, VectorUDAFSumLong(col 43:int) -> bigint, VectorUDAFSumLong(col 40:int) -> bigint + aggregators: VectorUDAFSumLong(col 40:int) -> bigint, VectorUDAFSumLong(col 42:int) -> bigint, VectorUDAFSumLong(col 43:int) -> bigint, VectorUDAFSumLong(col 44:int) -> bigint, VectorUDAFSumLong(col 41:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 38:string, col 37:string, col 35:string + keyExpressions: col 39:string, col 37:string, col 35:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] diff --git ql/src/test/results/clientpositive/tez/topnkey.q.out ql/src/test/results/clientpositive/tez/topnkey.q.out index 66b9191a48..0496c601f7 100644 --- ql/src/test/results/clientpositive/tez/topnkey.q.out +++ ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -29,10 +29,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_3] (rows=250 width=95) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_11] (rows=500 width=178) - keys:_col0,sort order:+,top n:5 - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_11] (rows=500 width=178) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -80,10 +80,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_2] (rows=250 width=87) Output:["_col0"],keys:key - Top N Key Operator [TNK_10] (rows=500 width=87) - keys:key,sort order:+,top n:5 - Select Operator [SEL_1] (rows=500 width=87) - Output:["key"] + Select Operator [SEL_1] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_10] (rows=500 width=87) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -100,63 +100,331 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 3 - File Output Operator [FS_13] - Limit [LIM_12] (rows=5 width=178) + Reducer 4 + File Output Operator [FS_17] + Limit [LIM_16] (rows=5 width=178) Number of rows:5 - Select Operator [SEL_11] (rows=791 width=178) + Select Operator [SEL_15] (rows=395 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_10] - Select Operator [SEL_9] (rows=791 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_16] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_14] + Group By Operator [GBY_12] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_22] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_33] (rows=791 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_34] + keys:key,sort order:+,top n:5 + Filter Operator [FIL_20] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_35] + keys:key, value,sort order:++,top n:5 + Filter Operator [FIL_21] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_25] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_25] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=142) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=103) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=103) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=103) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=140) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=140) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=140) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=140) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=140) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=140) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ql/src/test/results/clientpositive/tez/vector_topnkey.q.out index d6f7cc2940..ae4788de22 100644 --- ql/src/test/results/clientpositive/tez/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -29,10 +29,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_14] (rows=250 width=95) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_13] (rows=500 width=178) - keys:_col0,sort order:+,top n:5 - Select Operator [SEL_12] (rows=500 width=178) - Output:["_col0","_col1"] + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_12] (rows=500 width=178) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -49,10 +49,10 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -80,10 +80,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_13] (rows=250 width=87) Output:["_col0"],keys:key - Top N Key Operator [TNK_12] (rows=500 width=87) - keys:key,sort order:+,top n:5 - Select Operator [SEL_11] (rows=500 width=87) - Output:["key"] + Select Operator [SEL_12] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_11] (rows=500 width=87) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -100,63 +100,331 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 3 vectorized - File Output Operator [FS_37] - Limit [LIM_36] (rows=5 width=178) + Reducer 4 vectorized + File Output Operator [FS_48] + Limit [LIM_47] (rows=5 width=178) Number of rows:5 - Select Operator [SEL_35] (rows=791 width=178) + Select Operator [SEL_46] (rows=395 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_10] - Select Operator [SEL_9] (rows=791 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) - Conds:RS_31._col0=RS_34._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_31] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_29] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_34] - PartitionCols:_col0 - Select Operator [SEL_33] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_45] + Group By Operator [GBY_44] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_22] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_33] (rows=791 width=178) + Conds:RS_39._col0=RS_43._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_37] + keys:key,sort order:+,top n:5 + Filter Operator [FIL_36] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_43] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_41] + keys:key, value,sort order:++,top n:5 + Filter Operator [FIL_40] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_28._col0=RS_30._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_26] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_27._col0=RS_30._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_28] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_26] + Limit [LIM_25] (rows=5 width=142) + Number of rows:5 + Select Operator [SEL_24] (rows=500 width=103) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=500 width=103) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=103) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=140) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=140) + Conds:RS_19._col0=RS_21._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_25] + Limit [LIM_24] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_23] (rows=500 width=140) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_22] + Group By Operator [GBY_21] (rows=500 width=140) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=140) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=140) + Conds:RS_18._col0=RS_20._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_20] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/topnkey.q.out ql/src/test/results/clientpositive/topnkey.q.out index 31f3a70920..0aa910a369 100644 --- ql/src/test/results/clientpositive/topnkey.q.out +++ ql/src/test/results/clientpositive/topnkey.q.out @@ -182,20 +182,17 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -243,10 +240,11 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -255,20 +253,43 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -286,16 +307,406 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/vector_topnkey.q.out ql/src/test/results/clientpositive/vector_topnkey.q.out index ed829e2e7c..0c8e1da3c0 100644 --- ql/src/test/results/clientpositive/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -20,28 +20,12 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -50,11 +34,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -68,12 +47,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -96,17 +69,9 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint] Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -120,12 +85,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: _col0:string, _col1:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -165,10 +124,10 @@ POSTHOOK: Input: default@src 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -187,25 +146,11 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: key - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -214,11 +159,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -231,12 +171,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -258,17 +192,9 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string] Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -281,12 +207,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -326,11 +246,11 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -339,7 +259,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -394,10 +315,11 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -409,18 +331,47 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string] + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: vectorized @@ -433,12 +384,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: _col0:string, _col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -447,7 +392,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -465,16 +410,523 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 140976 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 70488 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104