diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4393a2825e..e94a4783fb 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2383,7 +2383,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), - HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", true, "Whether to enable top n key optimizer."), + HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", false, "Whether to enable top n key optimizer."), HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true, "Whether to enable shared work optimizer. The optimizer finds scan operator over the same table\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/CommonKeyPrefix.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/CommonKeyPrefix.java new file mode 100644 index 0000000000..59d890a034 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/CommonKeyPrefix.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Stream; + +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; + +/** + * Holds result of a common key prefix of two operators. + * Provides methods for mapping TopNKey operator keys to GroupBy and ReduceSink operator keys. + */ +public class CommonKeyPrefix { + + public static CommonKeyPrefix map(TopNKeyDesc topNKeyDesc, GroupByDesc groupByDesc) { + return map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + groupByDesc.getKeys(), groupByDesc.getColumnExprMap(), + topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder()); + } + + public static CommonKeyPrefix map(TopNKeyDesc topNKeyDesc, ReduceSinkDesc reduceSinkDesc) { + return map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + reduceSinkDesc.getKeyCols(), reduceSinkDesc.getColumnExprMap(), + reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder()); + } + + public static CommonKeyPrefix map( + List opKeys, String opOrder, String opNullOrder, + List parentKeys, + String parentOrder, String parentNullOrder) { + + CommonKeyPrefix commonPrefix = new CommonKeyPrefix(); + int size = Stream.of(opKeys.size(), opOrder.length(), opNullOrder.length(), + parentKeys.size(), parentOrder.length(), parentNullOrder.length()) + .min(Integer::compareTo) + .orElse(0); + + for (int i = 0; i < size; ++i) { + ExprNodeDesc opKey = opKeys.get(i); + ExprNodeDesc parentKey = parentKeys.get(i); + if (opKey.isSame(parentKey) && + opOrder.charAt(i) == parentOrder.charAt(i) && + opNullOrder.charAt(i) == parentNullOrder.charAt(i)) { + commonPrefix.add(parentKey, opOrder.charAt(i), opNullOrder.charAt(i)); + } else { + return commonPrefix; + } + } + return commonPrefix; + } + + public static CommonKeyPrefix map( + List opKeys, String opOrder, String opNullOrder, + List parentKeys, Map parentColExprMap, + String parentOrder, String parentNullOrder) { + + if (parentColExprMap == null) { + return map(opKeys, opOrder, opNullOrder, parentKeys, parentOrder, parentNullOrder); + } + + CommonKeyPrefix commonPrefix = new CommonKeyPrefix(); + int size = Stream.of(opKeys.size(), opOrder.length(), opNullOrder.length(), + parentKeys.size(), parentColExprMap.size(), parentOrder.length(), parentNullOrder.length()) + .min(Integer::compareTo) + .orElse(0); + + for (int i = 0; i < size; ++i) { + ExprNodeDesc column = opKeys.get(i); + ExprNodeDesc parentKey = parentKeys.get(i); + String columnName = column.getExprString(); + if (Objects.equals(parentColExprMap.get(columnName), parentKey) && + opOrder.charAt(i) == parentOrder.charAt(i) && + opNullOrder.charAt(i) == parentNullOrder.charAt(i)) { + commonPrefix.add(parentKey, opOrder.charAt(i), opNullOrder.charAt(i)); + } else { + return commonPrefix; + } + } + return commonPrefix; + } + + private List mappedColumns = new ArrayList<>(); + private StringBuilder mappedOrder = new StringBuilder(); + private StringBuilder mappedNullOrder = new StringBuilder(); + + public void add(ExprNodeDesc column, char order, char nullOrder) { + mappedColumns.add(column); + mappedOrder.append(order); + mappedNullOrder.append(nullOrder); + } + + public boolean isEmpty() { + return mappedColumns.isEmpty(); + } + + public List getMappedColumns() { + return mappedColumns; + } + + public String getMappedOrder() { + return mappedOrder.toString(); + } + + public String getMappedNullOrder() { + return mappedNullOrder.toString(); + } + + public int size() { + return mappedColumns.size(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java index 4b4cf99503..f483d591c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql.optimizer; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -27,9 +26,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; @@ -37,6 +33,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Stack; @@ -59,51 +56,39 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd; ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); - // Get GroupByOperator - GroupByOperator groupByOperator = (GroupByOperator) reduceSinkOperator.getParentOperators().get(0); - GroupByDesc groupByDesc = groupByOperator.getConf(); - // Check whether the reduce sink operator contains top n - if (!reduceSinkDesc.isOrdering() || reduceSinkDesc.getTopN() < 0) { - return null; - } - - // Check whether the group by operator is in hash mode - if (groupByDesc.getMode() != GroupByDesc.Mode.HASH) { - return null; - } - - // Check whether the group by operator has distinct aggregations - if (groupByDesc.isDistinct()) { - return null; - } - - // Check whether RS keys are same as GBY keys - List groupByKeyColumns = groupByDesc.getKeys(); - List mappedColumns = new ArrayList<>(); - for (ExprNodeDesc columns : reduceSinkDesc.getKeyCols()) { - mappedColumns.add(groupByDesc.getColumnExprMap().get(columns.getExprString())); - } - if (!ExprNodeDescUtils.isSame(mappedColumns, groupByKeyColumns)) { + if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) { return null; } // Check whether there already is a top n key operator - Operator parentOperator = groupByOperator.getParentOperators().get(0); + Operator parentOperator = reduceSinkOperator.getParentOperators().get(0); if (parentOperator instanceof TopNKeyOperator) { return null; } - // Insert a new top n key operator between the group by operator and its parent - TopNKeyDesc topNKeyDesc = new TopNKeyDesc( - reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), groupByKeyColumns); - Operator newOperator = OperatorFactory.getAndMakeChild( - groupByOperator.getCompilationOpContext(), (OperatorDesc) topNKeyDesc, - new RowSchema(groupByOperator.getSchema()), groupByOperator.getParentOperators()); - newOperator.getChildOperators().add(groupByOperator); - groupByOperator.getParentOperators().add(newOperator); - parentOperator.removeChild(groupByOperator); + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), + reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols()); + copyDown(reduceSinkOperator, topNKeyDesc); return null; } + + static TopNKeyOperator copyDown(Operator child, OperatorDesc operatorDesc) { + final List> parents = child.getParentOperators(); + + final Operator newOperator = + OperatorFactory.getAndMakeChild( + child.getCompilationOpContext(), operatorDesc, + new RowSchema(parents.get(0).getSchema()), child.getParentOperators()); + newOperator.setParentOperators(new ArrayList<>(parents)); + newOperator.setChildOperators(new ArrayList<>(Collections.singletonList(child))); + + for (Operator parent : parents) { + parent.removeChild(child); + } + child.setParentOperators(new ArrayList<>(Collections.singletonList(newOperator))); + + return (TopNKeyOperator) newOperator; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java new file mode 100644 index 0000000000..8af61e38bc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor.copyDown; + +/** + * Implementation of TopNKey operator pushdown. + */ +public class TopNKeyPushdownProcessor implements NodeProcessor { + private static final Logger LOG = LoggerFactory.getLogger(TopNKeyPushdownProcessor.class); + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + pushdown((TopNKeyOperator) nd); + return null; + } + + private void pushdown(TopNKeyOperator topNKey) throws SemanticException { + + final Operator parent = + topNKey.getParentOperators().get(0); + + switch (parent.getType()) { + case SELECT: + pushdownThroughSelect(topNKey); + break; + + case FORWARD: + LOG.debug("Pushing {} through {}", topNKey.getName(), parent.getName()); + moveDown(topNKey); + pushdown(topNKey); + break; + + case GROUPBY: + pushdownThroughGroupBy(topNKey); + break; + + case REDUCESINK: + pushdownThroughReduceSink(topNKey); + break; + + case MERGEJOIN: + case JOIN: + pushDownThroughJoin(topNKey, (CommonJoinOperator) parent); + break; + + case TOPNKEY: + pushdownTroughTopNKey(topNKey, (TopNKeyOperator) parent); + break; + + default: + break; + } + } + + /** + * Push through Project if expression(s) in TopNKey can be mapped to expression(s) based on Project input. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful in the method pushdown + */ + private void pushdownThroughSelect(TopNKeyOperator topNKey) throws SemanticException { + + final SelectOperator select = (SelectOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), select.getColumnExprMap()); + if (mappedColumns.size() != topNKeyDesc.getKeyColumns().size()) { + return; + } + + LOG.debug("Pushing {} through {}", topNKey.getName(), select.getName()); + topNKeyDesc.setKeyColumns(mappedColumns); + moveDown(topNKey); + pushdown(topNKey); + } + + private static List mapColumns(List columns, Map + colExprMap) { + + if (colExprMap == null) { + return new ArrayList<>(0); + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } + } + return mappedColumns; + } + + /** + * Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression, + * we can push it and remove it from above GroupBy. If expression in TopNKey shared common + * prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above + * it. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException { + final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0); + final GroupByDesc groupByDesc = groupBy.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check grouping sets + if (groupByDesc.isGroupingSetsPresent()) { + return; + } + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), commonKeyPrefix.getMappedOrder(), + commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(groupBy, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), groupBy.getName()); + groupBy.removeChildAndAdoptItsChildren(topNKey); + } + } + + /** + * Push through ReduceSink. If TopNKey expression is same as ReduceSink expression and order is + * the same, we can push it and remove it from above ReduceSink. If expression in TopNKey shared + * common prefix with ReduceSink including same order, TopNKey could be pushed through + * ReduceSink using that prefix and kept above it. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticException { + ReduceSinkOperator reduceSink = (ReduceSinkOperator) topNKey.getParentOperators().get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, reduceSinkDesc); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), reduceSink.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(reduceSink, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), reduceSink.getName()); + reduceSink.removeChildAndAdoptItsChildren(topNKey); + } + } + + private void pushDownThroughJoin(TopNKeyOperator topNKey, CommonJoinOperator parent) + throws SemanticException { + final JoinCondDesc[] joinConds = parent.getConf().getConds(); + final JoinCondDesc firstJoinCond = joinConds[0]; + for (JoinCondDesc joinCond : joinConds) { + if (!firstJoinCond.equals(joinCond)) { + return; + } + } + if (firstJoinCond.getType() == JoinDesc.LEFT_OUTER_JOIN) { + pushdownThroughLeftOuterJoin(topNKey); + } + } + + /** + * Push through LOJ. If TopNKey expression refers fully to expressions from left input, push + * with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix + * that refers to expressions from left input, push with rewriting of those expressions and keep + * on top of LOJ. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final List> joinInputs = join.getParentOperators(); + final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map( + mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), + topNKeyDesc.getColumnSortOrder(), + topNKeyDesc.getNullOrder(), + reduceSinkDesc.getKeyCols(), + reduceSinkDesc.getColumnExprMap(), + reduceSinkDesc.getOrder(), + reduceSinkDesc.getNullOrder()); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {} and {}", + topNKey.getName(), join.getName(), reduceSinkOperator.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), join.getName()); + join.removeChildAndAdoptItsChildren(topNKey); + } + } + + private void pushdownTroughTopNKey(TopNKeyOperator topNKey, TopNKeyOperator parent) throws SemanticException { + if (hasSameTopNKeyDesc(parent, topNKey.getConf())) { + LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName()); + parent.removeChildAndAdoptItsChildren(topNKey); + return; + } + + TopNKeyDesc topNKeyDesc = topNKey.getConf(); + TopNKeyDesc parentTopNKeyDesc = parent.getConf(); + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map( + topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), + parentTopNKeyDesc.getNullOrder()); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Pushing {} through {}", topNKey.getName(), parent.getName()); + moveDown(topNKey); + pushdown(topNKey); + } + } + + private static List mapUntilColumnEquals(List columns, Map colExprMap) { + if (colExprMap == null) { + return new ArrayList<>(0); + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } else { + return mappedColumns; + } + } + return mappedColumns; + } + + private static boolean hasSameTopNKeyDesc(Operator operator, TopNKeyDesc desc) { + if (!(operator instanceof TopNKeyOperator)) { + return false; + } + + final TopNKeyOperator topNKey = (TopNKeyOperator) operator; + final TopNKeyDesc opDesc = topNKey.getConf(); + return opDesc.isSame(desc); + } + + private static void moveDown(TopNKeyOperator topNKey) throws SemanticException { + + assert topNKey.getNumParent() == 1; + final Operator parent = topNKey.getParentOperators().get(0); + final List> grandParents = parent.getParentOperators(); + parent.removeChildAndAdoptItsChildren(topNKey); + for (Operator grandParent : grandParents) { + grandParent.replaceChild(parent, topNKey); + } + topNKey.setParentOperators(new ArrayList<>(grandParents)); + topNKey.setChildOperators(new ArrayList<>(Collections.singletonList(parent))); + parent.setParentOperators(new ArrayList<>(Collections.singletonList(topNKey))); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index bf58bd8bb8..b9e3cd3871 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -96,6 +97,7 @@ import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; +import org.apache.hadoop.hive.ql.optimizer.TopNKeyPushdownProcessor; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -472,6 +474,12 @@ private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, new SetHashGroupByMinReduction()); } + if (procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) { + opRules.put( + new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), + new TopNKeyPushdownProcessor()); + } + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); @@ -1287,8 +1295,7 @@ private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx) Map opRules = new LinkedHashMap(); opRules.put( - new RuleRegExp("Top n key optimization", GroupByOperator.getOperatorName() + "%" + - ReduceSinkOperator.getOperatorName() + "%"), + new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"), new TopNKeyProcessor()); // The dispatcher fires the processor corresponding to the closest matching diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestCommonKeyPrefix.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestCommonKeyPrefix.java new file mode 100644 index 0000000000..6df7df461b --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestCommonKeyPrefix.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import static java.util.Arrays.asList; +import static java.util.Collections.singletonList; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.junit.Test; + +/** + * Tests for CommonKeyPrefix. + */ +public class TestCommonKeyPrefix { + @Test + public void testmapWhenNoKeysExists() { + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + new ArrayList<>(0), "", "", new ArrayList<>(0), new HashMap<>(0), "", ""); + // then + assertThat(commonPrefix.isEmpty(), is(true)); + assertThat(commonPrefix.size(), is(0)); + assertThat(commonPrefix.getMappedOrder(), is("")); + assertThat(commonPrefix.getMappedNullOrder(), is("")); + assertThat(commonPrefix.getMappedColumns().isEmpty(), is(true)); + } + + @Test + public void testmapWhenAllKeysMatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(2)); + assertThat(commonPrefix.getMappedOrder(), is("++")); + assertThat(commonPrefix.getMappedNullOrder(), is("aa")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + assertThat(commonPrefix.getMappedColumns().get(1), is(parentCol1)); + } + + @Test + public void testmapWhenOnlyFirstKeyMatchFromTwo() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc differentChildCol = new ExprNodeColumnDesc(); + differentChildCol.setColumn("_col2"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, differentChildCol), "++", "aa", + asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + } + + @Test + public void testmapWhenAllColumnsMatchButOrderMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "+-", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedNullOrder(), is("a")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + + // when + commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "-+", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(true)); + } + + @Test + public void testmapWhenAllColumnsMatchButNullOrderMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "az", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedNullOrder(), is("a")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + + // when + commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "za", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(true)); + } + + @Test + public void testmapWhenKeyCountsMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "aa", singletonList(parentCol0), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + } +} diff --git ql/src/test/queries/clientpositive/topnkey.q ql/src/test/queries/clientpositive/topnkey.q index 057b6a45ba..6b53d6c6b1 100644 --- ql/src/test/queries/clientpositive/topnkey.q +++ ql/src/test/queries/clientpositive/topnkey.q @@ -1,30 +1,68 @@ --! qt:dataset:src -set hive.mapred.mode=nonstrict; -set hive.vectorized.execution.enabled=false; -set hive.optimize.topnkey=true; - -set hive.optimize.ppd=true; -set hive.ppd.remove.duplicatefilters=true; -set hive.tez.dynamic.partition.pruning=true; -set hive.optimize.metadataonly=false; -set hive.optimize.index.filter=true; -set hive.tez.min.bloom.filter.entries=1; - -set hive.tez.dynamic.partition.pruning=true; -set hive.stats.fetch.column.stats=true; -set hive.cbo.enable=true; - -EXPLAIN EXTENDED +SET hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=false; +SET hive.optimize.topnkey=true; + +SET hive.optimize.ppd=true; +SET hive.ppd.remove.duplicatefilters=true; +SET hive.tez.dynamic.partition.pruning=true; +SET hive.optimize.metadataonly=false; +SET hive.optimize.index.filter=true; +SET hive.tez.min.bloom.filter.entries=1; + +SET hive.stats.fetch.column.stats=true; +SET hive.cbo.enable=true; + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=false; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SET hive.optimize.topnkey=false; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SET hive.optimize.topnkey=true; EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=false; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; + +CREATE TABLE t_test( + a int, + b int, + c int +); + +INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; + +SET hive.optimize.topnkey=false; +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SET hive.optimize.topnkey=false; +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +DROP TABLE t_test; diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q index 85c5880cd6..3412aba9db 100644 --- ql/src/test/queries/clientpositive/vector_topnkey.q +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -43,4 +43,4 @@ SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdoubl SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3; SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3; -DROP TABLE t_test; \ No newline at end of file +DROP TABLE t_test; diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 0c051c926b..8d5ad0cedc 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -74,27 +74,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -217,27 +211,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -334,27 +322,21 @@ STAGE PLANS: expressions: length(key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: _col0 (type: int) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -430,27 +412,21 @@ STAGE PLANS: expressions: abs(length(key)) (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: _col0 (type: int) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -527,27 +503,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -645,27 +615,21 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: value (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1281,28 +1245,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() + bucketGroup: true keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1400,27 +1358,21 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: value (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1625,28 +1577,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() + bucketGroup: true keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1744,27 +1690,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: count() keys: key (type: string), value (type: string) - null sort order: za - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index 9f2c9a1cd0..86e195cf9b 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1756,27 +1756,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: min(_col2), max(_col3) keys: _col0 (type: string), _col1 (type: string) - null sort order: zz - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: min(_col2), max(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int), _col3 (type: decimal(5,2)) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: int), _col3 (type: decimal(5,2)) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/constraints_optimization.q.out ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index b6d210becf..572156f1c9 100644 --- ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -330,27 +330,21 @@ STAGE PLANS: expressions: d_datekey (type: bigint), d_id (type: bigint) outputColumnNames: d_datekey, d_id Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: count() keys: d_datekey (type: bigint), d_id (type: bigint) - null sort order: za + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Group By Operator - aggregations: count() - keys: d_datekey (type: bigint), d_id (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: bigint) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -598,25 +592,19 @@ STAGE PLANS: expressions: d_datekey (type: bigint), d_sellingseason (type: string) outputColumnNames: d_datekey, d_sellingseason Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: d_datekey (type: bigint), d_sellingseason (type: string) - null sort order: za - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - keys: d_datekey (type: bigint), d_sellingseason (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 9343e078b7..6119f38847 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3081,25 +3081,19 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: key (type: string), value (type: string) - null sort order: za - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 735296f814..d458db6829 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1538,21 +1538,19 @@ Stage-0 PartitionCols:_col0, _col1,null sort order:zz,sort order:++ Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Top N Key Operator [TNK_15] (rows=10 width=101) - keys:_col1, _col0,null sort order:zz,sort order:++,top n:1 - Select Operator [SEL_5] (rows=10 width=101) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=10 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - PartitionCols:_col0, _col1, _col2,null sort order:aaa,sort order:+++ - Group By Operator [GBY_2] (rows=10 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Select Operator [SEL_1] (rows=20 width=88) - Output:["key","c_int","c_float"] - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Select Operator [SEL_5] (rows=10 width=101) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=10 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_3] + PartitionCols:_col0, _col1, _col2,null sort order:aaa,sort order:+++ + Group By Operator [GBY_2] (rows=10 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Select Operator [SEL_1] (rows=20 width=88) + Output:["key","c_int","c_float"] + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 PREHOOK: type: QUERY @@ -1677,7 +1675,7 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col7 Select Operator [SEL_35] (rows=2 width=20) Output:["_col4","_col7"] - Merge Join Operator [MERGEJOIN_72] (rows=2 width=20) + Merge Join Operator [MERGEJOIN_71] (rows=2 width=20) Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col4","_col5","_col7","_col8"],residual filter predicates:{(_col5 or _col8)} <-Map 11 [SIMPLE_EDGE] llap SHUFFLE [RS_33] @@ -1691,7 +1689,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_32] PartitionCols:_col2,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_71] (rows=1 width=105) + Merge Join Operator [MERGEJOIN_70] (rows=1 width=105) Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col3 + _col1) >= 0)} <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_30] @@ -1716,12 +1714,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2,null sort order:zaa,sort order:+++ Group By Operator [GBY_16] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Top N Key Operator [TNK_55] (rows=6 width=93) - keys:key, c_int, c_float,null sort order:zaa,sort order:+++,top n:5 - Filter Operator [FIL_53] (rows=6 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_13] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_53] (rows=6 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) + TableScan [TS_13] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_29] PartitionCols:_col0,null sort order:a,sort order:+ diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index b90fb55fd1..d0e0e9df12 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -335,6 +335,7 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized, llap +<<<<<<< HEAD File Output Operator [FS_217] Limit [LIM_216] (rows=2 width=285) Number of rows:100 @@ -344,98 +345,191 @@ Stage-0 SHUFFLE [RS_214] null sort order:zzz,sort order:+++ Group By Operator [GBY_213] (rows=2 width=285) +======= + File Output Operator [FS_220] + Limit [LIM_219] (rows=5 width=285) + Number of rows:100 + Select Operator [SEL_218] (rows=5 width=285) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_217] + Group By Operator [GBY_216] (rows=5 width=285) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2,null sort order:zzz,sort order:+++ Group By Operator [GBY_48] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col8)","count(_col15)","count(_col3)"],keys:_col7, _col14, _col2 +<<<<<<< HEAD Top N Key Operator [TNK_91] (rows=28 width=534) keys:_col7, _col14, _col2,null sort order:zzz,sort order:+++,top n:100 Merge Join Operator [MERGEJOIN_188] (rows=28 width=534) +======= + Top N Key Operator [TNK_92] (rows=4704 width=534) + keys:_col7, _col14, _col2,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_189] (rows=4704 width=534) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Conds:RS_44._col1, _col3=RS_45._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_45] PartitionCols:_col10, _col12,null sort order:aa,sort order:++ Select Operator [SEL_40] (rows=2 width=447) Output:["_col2","_col3","_col9","_col10","_col12"] +<<<<<<< HEAD Merge Join Operator [MERGEJOIN_187] (rows=2 width=447) +======= + Merge Join Operator [MERGEJOIN_188] (rows=336 width=447) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Conds:RS_37._col2, _col4=RS_38._col1, _col3(Inner),Output:["_col0","_col1","_col9","_col10","_col12"] <-Reducer 11 [SIMPLE_EDGE] llap SHUFFLE [RS_38] +<<<<<<< HEAD PartitionCols:_col1, _col3,null sort order:aa,sort order:++ +======= + PartitionCols:_col1, _col3 +<<<<<<< HEAD +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Merge Join Operator [MERGEJOIN_186] (rows=5 width=356) Conds:RS_212._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col3","_col4"] +======= + Merge Join Operator [MERGEJOIN_187] (rows=8 width=356) + Conds:RS_215._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3","_col4"] +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update <-Map 6 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_200] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_197] (rows=5 width=87) +======= + SHUFFLE [RS_203] + PartitionCols:_col0 + Select Operator [SEL_200] (rows=5 width=87) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_194] (rows=5 width=178) + Filter Operator [FIL_197] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=178) default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 15 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_212] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_211] (rows=7 width=443) +======= + SHUFFLE [RS_215] + PartitionCols:_col0 + Select Operator [SEL_214] (rows=7 width=443) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_210] (rows=7 width=534) + Filter Operator [FIL_213] (rows=7 width=534) predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) TableScan [TS_18] (rows=85 width=534) default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_37] +<<<<<<< HEAD PartitionCols:_col2, _col4,null sort order:aa,sort order:++ +======= + PartitionCols:_col2, _col4 +<<<<<<< HEAD +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Merge Join Operator [MERGEJOIN_185] (rows=2 width=352) Conds:RS_34._col1=RS_209._col0(Inner),Output:["_col0","_col1","_col2","_col4"] +======= + Merge Join Operator [MERGEJOIN_186] (rows=42 width=352) + Conds:RS_34._col1=RS_212._col0(Inner),Output:["_col0","_col1","_col2","_col4"] +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update <-Map 14 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_209] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_208] (rows=2 width=89) +======= + SHUFFLE [RS_212] + PartitionCols:_col0 + Select Operator [SEL_211] (rows=2 width=89) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_207] (rows=2 width=175) + Filter Operator [FIL_210] (rows=2 width=175) predicate:((key = 'src1key') and value is not null) TableScan [TS_15] (rows=25 width=175) default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_34] +<<<<<<< HEAD PartitionCols:_col1,null sort order:a,sort order:+ +======= + PartitionCols:_col1 +<<<<<<< HEAD +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Merge Join Operator [MERGEJOIN_184] (rows=2 width=352) Conds:RS_31._col3=RS_206._col0(Inner),Output:["_col0","_col1","_col2","_col4"] +======= + Merge Join Operator [MERGEJOIN_185] (rows=42 width=352) + Conds:RS_31._col3=RS_209._col0(Inner),Output:["_col0","_col1","_col2","_col4"] +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update <-Map 13 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_206] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_205] (rows=6 width=91) +======= + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=6 width=91) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_204] (rows=6 width=178) + Filter Operator [FIL_207] (rows=6 width=178) predicate:((key = 'srcpartkey') and value is not null) TableScan [TS_12] (rows=2000 width=178) default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_31] +<<<<<<< HEAD PartitionCols:_col3,null sort order:a,sort order:+ +======= + PartitionCols:_col3 +<<<<<<< HEAD +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Merge Join Operator [MERGEJOIN_183] (rows=2 width=443) Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] +======= + Merge Join Operator [MERGEJOIN_184] (rows=7 width=443) + Conds:RS_206._col0=RS_202._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update <-Map 6 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_199] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_196] (rows=2 width=87) +======= + SHUFFLE [RS_202] + PartitionCols:_col0 + Select Operator [SEL_199] (rows=2 width=87) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_193] (rows=2 width=178) + Filter Operator [FIL_196] (rows=2 width=178) predicate:((value = 'd1value') and key is not null) Please refer to the previous TableScan [TS_3] <-Map 12 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_203] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_202] (rows=7 width=443) +======= + SHUFFLE [RS_206] + PartitionCols:_col0 + Select Operator [SEL_205] (rows=7 width=443) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_201] (rows=7 width=534) + Filter Operator [FIL_204] (rows=7 width=534) predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) TableScan [TS_6] (rows=85 width=534) default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_44] +<<<<<<< HEAD PartitionCols:_col1, _col3,null sort order:aa,sort order:++ Merge Join Operator [MERGEJOIN_182] (rows=70 width=269) Conds:RS_191._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] @@ -443,16 +537,31 @@ Stage-0 SHUFFLE [RS_198] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_195] (rows=5 width=87) +======= + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_183] (rows=70 width=269) + Conds:RS_194._col0=RS_201._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_201] + PartitionCols:_col0 + Select Operator [SEL_198] (rows=5 width=87) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_192] (rows=5 width=178) + Filter Operator [FIL_195] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_191] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_190] (rows=170 width=356) +======= + SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=170 width=356) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_189] (rows=170 width=356) + Filter Operator [FIL_192] (rows=170 width=356) predicate:(v2 is not null and v3 is not null and k1 is not null) TableScan [TS_0] (rows=170 width=356) default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] @@ -1070,12 +1179,13 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_234] - Limit [LIM_233] (rows=100 width=10) + File Output Operator [FS_237] + Limit [LIM_236] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_232] (rows=805 width=10) + Select Operator [SEL_235] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized, llap +<<<<<<< HEAD SHUFFLE [RS_231] null sort order:zzz,sort order:+++ Group By Operator [GBY_230] (rows=805 width=10) @@ -1098,22 +1208,46 @@ Stage-0 BROADCAST [RS_198] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_197] (rows=170 width=34) +======= + SHUFFLE [RS_234] + Group By Operator [GBY_233] (rows=805 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_232] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_231] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col9)","count(_col16)","count(_col3)"],keys:_col8, _col15, _col2 + Top N Key Operator [TNK_230] (rows=1610 width=10) + keys:_col8, _col15, _col2,sort order:+++,top n:100 + Map Join Operator [MAPJOIN_229] (rows=1610 width=10) + Conds:RS_205._col1, _col3=SEL_228._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] + <-Map 2 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_205] + PartitionCols:_col1, _col3 + Map Join Operator [MAPJOIN_204] (rows=550 width=10) + Conds:RS_201._col0=SEL_203._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_201] + PartitionCols:_col0 + Select Operator [SEL_200] (rows=170 width=34) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_196] (rows=170 width=34) + Filter Operator [FIL_199] (rows=170 width=34) predicate:(v2 is not null and v3 is not null and k1 is not null) TableScan [TS_0] (rows=170 width=34) default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_200] (rows=500 width=10) + <-Select Operator [SEL_203] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_199] (rows=500 width=10) + Filter Operator [FIL_202] (rows=500 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=10) default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_225] (rows=1464 width=10) + <-Select Operator [SEL_228] (rows=1464 width=10) Output:["_col3","_col4","_col10","_col11","_col13"] - Map Join Operator [MAPJOIN_224] (rows=1464 width=10) - Conds:MAPJOIN_223._col3, _col5=RS_218._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] + Map Join Operator [MAPJOIN_227] (rows=1464 width=10) + Conds:MAPJOIN_226._col3, _col5=RS_221._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] <-Map 9 [BROADCAST_EDGE] vectorized, llap +<<<<<<< HEAD BROADCAST [RS_218] PartitionCols:_col2, _col4,null sort order:aa,sort order:++ Map Join Operator [MAPJOIN_217] (rows=550 width=10) @@ -1122,53 +1256,81 @@ Stage-0 BROADCAST [RS_214] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_213] (rows=42 width=34) +======= + BROADCAST [RS_221] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_220] (rows=550 width=10) + Conds:SEL_219._col0=RS_217._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=42 width=34) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_212] (rows=42 width=34) + Filter Operator [FIL_215] (rows=42 width=34) predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) TableScan [TS_21] (rows=85 width=34) default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_216] (rows=500 width=10) + <-Select Operator [SEL_219] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_215] (rows=500 width=10) + Filter Operator [FIL_218] (rows=500 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_18] (rows=500 width=10) default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_223] (rows=1331 width=10) - Conds:MAPJOIN_222._col2=RS_211._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map Join Operator [MAPJOIN_226] (rows=1331 width=10) + Conds:MAPJOIN_225._col2=RS_214._col0(Inner),Output:["_col1","_col2","_col3","_col5"] <-Map 8 [BROADCAST_EDGE] vectorized, llap +<<<<<<< HEAD BROADCAST [RS_211] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_210] (rows=12 width=7) +======= + BROADCAST [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=12 width=7) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_209] (rows=12 width=7) + Filter Operator [FIL_212] (rows=12 width=7) predicate:((key = 'src1key') and value is not null) TableScan [TS_15] (rows=25 width=7) default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_222] (rows=1210 width=10) - Conds:MAPJOIN_221._col1=RS_208._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map Join Operator [MAPJOIN_225] (rows=1210 width=10) + Conds:MAPJOIN_224._col1=RS_211._col0(Inner),Output:["_col1","_col2","_col3","_col5"] <-Map 7 [BROADCAST_EDGE] vectorized, llap +<<<<<<< HEAD BROADCAST [RS_208] PartitionCols:_col0,null sort order:a,sort order:+ Select Operator [SEL_207] (rows=250 width=10) +======= + BROADCAST [RS_211] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=250 width=10) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0"] - Filter Operator [FIL_206] (rows=250 width=10) + Filter Operator [FIL_209] (rows=250 width=10) predicate:((value = 'd1value') and key is not null) TableScan [TS_12] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_221] (rows=1100 width=10) - Conds:SEL_220._col0=RS_205._col3(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map Join Operator [MAPJOIN_224] (rows=1100 width=10) + Conds:SEL_223._col0=RS_208._col3(Inner),Output:["_col1","_col2","_col3","_col5"] <-Map 6 [BROADCAST_EDGE] vectorized, llap +<<<<<<< HEAD BROADCAST [RS_205] PartitionCols:_col3,null sort order:a,sort order:+ Select Operator [SEL_204] (rows=42 width=34) +======= + BROADCAST [RS_208] + PartitionCols:_col3 + Select Operator [SEL_207] (rows=42 width=34) +>>>>>>> HIVE-20150: TopNKey pushdown - q test out update Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_203] (rows=42 width=34) + Filter Operator [FIL_206] (rows=42 width=34) predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) TableScan [TS_9] (rows=85 width=34) default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_220] (rows=1000 width=10) + <-Select Operator [SEL_223] (rows=1000 width=10) Output:["_col0"] - Filter Operator [FIL_219] (rows=1000 width=10) + Filter Operator [FIL_222] (rows=1000 width=10) predicate:((key = 'srcpartkey') and value is not null) TableScan [TS_6] (rows=2000 width=10) default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out index 545cce75a9..d11c3d7ea9 100644 --- ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out +++ ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out @@ -1920,27 +1920,21 @@ GROUP BY "t0"."cs_ship_customer_sk" expressions: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) outputColumnNames: _col32, _col33, _col34, _col35, _col36 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++++ + Group By Operator + aggregations: count() keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - null sort order: zzzzz + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: count() - keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2410,27 +2404,21 @@ GROUP BY "t0"."cs_ship_customer_sk" expressions: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) outputColumnNames: _col32, _col33, _col34, _col35, _col36 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++++ + Group By Operator + aggregations: count() keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - null sort order: zzzzz + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: count() - keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 3fdd77d802..dfa9bdfd57 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -213,27 +213,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -321,27 +315,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -433,25 +421,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -461,13 +443,13 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1009,48 +991,29 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index efa8c38d7c..e11490856a 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -214,27 +214,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -337,27 +331,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -464,25 +452,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -492,12 +474,12 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -505,13 +487,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/offset_limit.q.out ql/src/test/results/clientpositive/llap/offset_limit.q.out index 23f2de46e5..36e9df4763 100644 --- ql/src/test/results/clientpositive/llap/offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit.q.out @@ -30,27 +30,21 @@ STAGE PLANS: expressions: key (type: string), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 4ecb7bc46d..7ed66b7681 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -215,27 +215,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -324,27 +318,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -437,25 +425,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -465,14 +447,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index 0eac389eb7..0818e6a68e 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -66,20 +66,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -146,11 +146,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -164,11 +164,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -182,11 +182,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -200,11 +200,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain vectorization expression select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -246,43 +246,33 @@ STAGE PLANS: projectedOutputColumnNums: [5] selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: a + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 5:int - native: true - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -351,11 +341,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -382,20 +372,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -409,11 +399,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -427,11 +417,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -445,11 +435,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -476,20 +466,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -503,11 +493,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -521,11 +511,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -539,11 +529,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index 4362fb6f2e..30a5022fdb 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,20 +78,20 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -158,11 +158,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -176,11 +176,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -222,43 +222,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -355,11 +345,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -388,20 +378,20 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -468,11 +458,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -486,11 +476,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -532,43 +522,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -665,11 +645,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -698,20 +678,20 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -778,11 +758,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -796,11 +776,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -842,43 +822,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -975,11 +945,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index 24468c9a1b..c6967d77be 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,22 +88,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -171,12 +171,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -191,12 +191,12 @@ PREHOOK: query: explain vectorization expression select sum(intMap[123]), sum(do from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -238,43 +238,33 @@ STAGE PLANS: projectedOutputColumnNums: [9, 10, 11] selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 9:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 10:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 11:double Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 9:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 9:string - native: true - Group By Operator - aggregations: sum(_col1), sum(_col2) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 9:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: double) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -372,12 +362,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -408,22 +398,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -438,12 +428,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -474,22 +464,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -504,12 +494,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index 45890a1890..87399e88cb 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,20 +66,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -146,11 +146,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -164,11 +164,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -182,11 +182,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -200,11 +200,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain vectorization expression select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain vectorization expression select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -246,43 +246,33 @@ STAGE PLANS: projectedOutputColumnNums: [5] selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: a + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 5:int - native: true - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -351,11 +341,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -382,20 +372,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -409,11 +399,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -427,11 +417,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -445,11 +435,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -476,20 +466,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -503,11 +493,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -521,11 +511,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -539,11 +529,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/topnkey.q.out ql/src/test/results/clientpositive/llap/topnkey.q.out index 1e77587f82..ed54cd87b9 100644 --- ql/src/test/results/clientpositive/llap/topnkey.q.out +++ ql/src/test/results/clientpositive/llap/topnkey.q.out @@ -1,18 +1,13 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) AS `$f1` -FROM `default`.`src` -GROUP BY `key` -ORDER BY `key` -LIMIT 5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,17 +26,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z + Top N Key Operator + sort order: + + keys: key (type: string) + null sort order: z + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -55,69 +48,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: true Execution mode: llap LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -130,14 +66,10 @@ STAGE PLANS: null sort order: z sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -148,26 +80,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -175,6 +92,19 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -189,12 +119,12 @@ POSTHOOK: Input: default@src 103 206 104 208 PREHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -207,67 +137,112 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a sort order: + - keys: key (type: string) - null sort order: z + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col0 (type: string) + null sort order: z + top n: 5 + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + null sort order: za + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - Reducer 3 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -279,33 +254,42 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -315,19 +299,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + null sort order: a + top n: 5 Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -340,7 +326,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 @@ -367,30 +353,52 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + null sort order: aa + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -408,16 +416,275 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: a (type: int), b (type: int) + null sort order: zz + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: a (type: int), b (type: int) + null sort order: zz + top n: 3 + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: a (type: int), b (type: int) + minReductionHashAggr: 0.375 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index cc2dc47280..d2347fa8aa 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -145,43 +145,33 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - null sort order: z - Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) + Group By Vectorization: + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 2:int - native: true - Group By Operator - aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) - Group By Vectorization: - aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: _col0 (type: int) - minReductionHashAggr: 0.75500476 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + keys: _col0 (type: int) + minReductionHashAggr: 0.75500476 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index f7e76e5a8b..4cd3f4bdd4 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -106,43 +106,33 @@ STAGE PLANS: projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: char(20)) - null sort order: z - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 1:char(20) - native: true - Group By Operator - aggregations: sum(_col1), count() - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:char(20) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: char(20)) - minReductionHashAggr: 0.49900198 - mode: hash - outputColumnNames: _col0, _col1, _col2 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: char(20)) + minReductionHashAggr: 0.500998 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(20)) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -319,43 +309,33 @@ STAGE PLANS: projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: - - keys: _col0 (type: char(20)) - null sort order: z - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 1:char(20) - native: true - Group By Operator - aggregations: sum(_col1), count() - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:char(20) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: char(20)) - minReductionHashAggr: 0.49900198 - mode: hash - outputColumnNames: _col0, _col1, _col2 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: char(20)) + minReductionHashAggr: 0.500998 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: char(20)) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index 6fd15e7101..72fe7fcd2a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -72,45 +72,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -127,7 +117,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -290,45 +280,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -345,7 +325,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -508,45 +488,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -563,7 +533,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -726,41 +696,31 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++++ - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - null sort order: zaaa - Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + null sort order: zaaa + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - null sort order: zaaa - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:string, 3:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -777,7 +737,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -936,41 +896,31 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: a (type: string) - null sort order: z - Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: a (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: a (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1133,45 +1083,35 @@ STAGE PLANS: projectedOutputColumnNums: [6] selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: double) - null sort order: z - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 6:double - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 6:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: double) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: double) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:double + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:double - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:bigint - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index d6325982e3..529c929013 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -270,40 +270,30 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9] Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ss_ticket_number (type: int) - null sort order: z - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 9:int - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 9:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ss_ticket_number (type: int) + minReductionHashAggr: 0.915 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index a8019be7aa..973ddaf43b 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -360,40 +360,30 @@ STAGE PLANS: projectedOutputColumnNums: [25] selectExpressions: StringGroupConcatColCol(col 22:string, col 24:string)(children: StringGroupColConcatStringScalar(col 21:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 20:string)(children: CastLongToString(col 19:int)(children: CastDoubleToLong(col 18:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 18:double) -> 19:int) -> 20:string) -> 21:string) -> 22:string, CastLongToString(col 23:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 23:int) -> 24:string) -> 25:string Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE - top n: 50 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 25:string - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 25:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index c140bdfd37..5b52f32b3e 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -87,24 +87,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct] - Select Operator - expressions: cint1 (type: int) - outputColumnNames: cint1 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: cint1 (type: int) + null sort order: z + top n: 3 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:int native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cint1 (type: int) - null sort order: z - Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int + Select Operator + expressions: cint1 (type: int) + outputColumnNames: cint1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -117,7 +116,7 @@ STAGE PLANS: minReductionHashAggr: 0.64285713 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -128,7 +127,7 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs @@ -174,7 +173,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -184,7 +183,7 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -209,7 +208,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7326adf522..680c1134ac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -510,42 +510,32 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ctinyint (type: tinyint) - null sort order: z - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:tinyint - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint) - minReductionHashAggr: 0.9893392 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumns: 0:tinyint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumns: 0:tinyint - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + minReductionHashAggr: 0.9893392 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:tinyint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 0:tinyint + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -590,19 +580,19 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/perf/tez/query10.q.out ql/src/test/results/clientpositive/perf/tez/query10.q.out index 30628f4829..606d3c4b3a 100644 --- ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -157,190 +157,188 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_231] - Limit [LIM_230] (rows=1 width=419) + File Output Operator [FS_230] + Limit [LIM_229] (rows=1 width=419) Number of rows:100 - Select Operator [SEL_229] (rows=1 width=419) + Select Operator [SEL_228] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] + SHUFFLE [RS_227] null sort order:zzzzzzzz,sort order:++++++++ - Select Operator [SEL_227] (rows=1 width=419) + Select Operator [SEL_226] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_226] (rows=1 width=379) + Group By Operator [GBY_225] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,null sort order:zzzzzzzz,sort order:++++++++ Group By Operator [GBY_66] (rows=3 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Top N Key Operator [TNK_103] (rows=1401496 width=379) - keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,null sort order:zzzzzzzz,sort order:++++++++,top n:100 - Select Operator [SEL_65] (rows=1401496 width=379) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_64] (rows=1401496 width=379) - predicate:(_col14 is not null or _col16 is not null) - Merge Join Operator [MERGEJOIN_181] (rows=1401496 width=379) - Conds:RS_61._col0=RS_225._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] - <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_224] (rows=1401496 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_223] (rows=1401496 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_43] (rows=285115246 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=285115246 width=3) - Conds:RS_222._col0=RS_197._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_197] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_192] (rows=201 width=4) - Output:["_col0"] - Filter Operator [FIL_191] (rows=201 width=12) - predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_221] (rows=285115246 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_220] (rows=285115246 width=7) - predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_33] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_204] - Group By Operator [GBY_201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_180] (rows=1414922 width=379) - Conds:RS_58._col0=RS_217._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_216] (rows=1414922 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_215] (rows=1414922 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_29] (rows=143930993 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=143930993 width=3) - Conds:RS_214._col0=RS_195._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_192] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_213] (rows=143930993 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_212] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_19] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_203] - Group By Operator [GBY_200] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] + Select Operator [SEL_65] (rows=1401496 width=379) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_64] (rows=1401496 width=379) + predicate:(_col14 is not null or _col16 is not null) + Merge Join Operator [MERGEJOIN_180] (rows=1401496 width=379) + Conds:RS_61._col0=RS_224._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_223] (rows=1401496 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_222] (rows=1401496 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=375) - Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_54] (rows=525327388 width=3) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=525327388 width=3) + Group By Operator [GBY_43] (rows=285115246 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=285115246 width=3) + Conds:RS_221._col0=RS_196._col0(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_191] (rows=201 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_176] (rows=525327388 width=3) - Conds:RS_209._col0=RS_193._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_192] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_208] (rows=525327388 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=525327388 width=7) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Filter Operator [FIL_190] (rows=201 width=12) + predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_220] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_219] (rows=285115246 width=7) + predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_33] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_197] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_191] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_179] (rows=1414922 width=379) + Conds:RS_58._col0=RS_216._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_215] (rows=1414922 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_214] (rows=1414922 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_175] (rows=228127 width=375) - Conds:RS_50._col1=RS_190._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_189] (rows=1861800 width=375) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_188] (rows=1861800 width=375) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=375) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_174] (rows=224946 width=4) - Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] - PartitionCols:_col2,null sort order:a,sort order:+ - Select Operator [SEL_183] (rows=77201384 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_182] (rows=77201384 width=11) - predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=11) - default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_186] (rows=116550 width=102) - Output:["_col0"] - Filter Operator [FIL_185] (rows=116550 width=102) - predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=102) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + Group By Operator [GBY_29] (rows=143930993 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=143930993 width=3) + Conds:RS_213._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_191] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_212] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_211] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_191] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_178] (rows=525327388 width=375) + Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_54] (rows=525327388 width=3) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=525327388 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=525327388 width=3) + Conds:RS_208._col0=RS_192._col0(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_192] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_191] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_207] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_206] (rows=525327388 width=7) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_193] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_191] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_174] (rows=228127 width=375) + Conds:RS_50._col1=RS_189._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_188] (rows=1861800 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_187] (rows=1861800 width=375) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=375) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_173] (rows=224946 width=4) + Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col2,null sort order:a,sort order:+ + Select Operator [SEL_182] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=77201384 width=11) + predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_185] (rows=116550 width=102) + Output:["_col0"] + Filter Operator [FIL_184] (rows=116550 width=102) + predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=102) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] diff --git ql/src/test/results/clientpositive/perf/tez/query15.q.out ql/src/test/results/clientpositive/perf/tez/query15.q.out index 418bffddca..4c70b73c26 100644 --- ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -62,84 +62,82 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_99] - Limit [LIM_98] (rows=100 width=201) + File Output Operator [FS_98] + Limit [LIM_97] (rows=100 width=201) Number of rows:100 - Select Operator [SEL_97] (rows=10141 width=201) + Select Operator [SEL_96] (rows=10141 width=201) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + SHUFFLE [RS_95] null sort order:z,sort order:+ - Group By Operator [GBY_95] (rows=10141 width=201) + Group By Operator [GBY_94] (rows=10141 width=201) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0,null sort order:z,sort order:+ Group By Operator [GBY_24] (rows=2403417 width=201) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3 - Top N Key Operator [TNK_44] (rows=285117831 width=212) - keys:_col3,null sort order:z,sort order:+,top n:100 - Select Operator [SEL_23] (rows=285117831 width=212) - Output:["_col3","_col8"] - Filter Operator [FIL_22] (rows=285117831 width=212) - predicate:(_col9 or _col4 or _col5) - Merge Join Operator [MERGEJOIN_77] (rows=285117831 width=212) - Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_75] (rows=80000000 width=101) - Conds:RS_80._col1=RS_83._col0(Inner),Output:["_col0","_col3","_col4","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_79] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_78] (rows=80000000 width=8) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_83] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_82] (rows=40000000 width=101) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_81] (rows=40000000 width=179) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=179) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_76] (rows=285117831 width=119) - Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_86] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_85] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_84] (rows=130 width=12) - predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_93] (rows=285117831 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_92] (rows=285117831 width=119) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_91] - Group By Operator [GBY_90] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_87] (rows=130 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_85] + Select Operator [SEL_23] (rows=285117831 width=212) + Output:["_col3","_col8"] + Filter Operator [FIL_22] (rows=285117831 width=212) + predicate:(_col9 or _col4 or _col5) + Merge Join Operator [MERGEJOIN_76] (rows=285117831 width=212) + Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_74] (rows=80000000 width=101) + Conds:RS_79._col1=RS_82._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_78] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_77] (rows=80000000 width=8) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_82] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_81] (rows=40000000 width=101) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_80] (rows=40000000 width=179) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=179) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_75] (rows=285117831 width=119) + Conds:RS_93._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_85] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_84] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_83] (rows=130 width=12) + predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_92] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_91] (rows=285117831 width=119) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_6] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_86] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_84] diff --git ql/src/test/results/clientpositive/perf/tez/query17.q.out ql/src/test/results/clientpositive/perf/tez/query17.q.out index e7eda149e8..990e26bc80 100644 --- ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -122,149 +122,147 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_253] - Limit [LIM_252] (rows=100 width=466) + File Output Operator [FS_252] + Limit [LIM_251] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_251] (rows=97302218447 width=466) + Select Operator [SEL_250] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + SHUFFLE [RS_249] null sort order:zzz,sort order:+++ - Select Operator [SEL_249] (rows=97302218447 width=466) + Select Operator [SEL_248] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_248] (rows=97302218447 width=466) + Group By Operator [GBY_247] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2,null sort order:zzz,sort order:+++ Group By Operator [GBY_49] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=97302218447 width=381) - keys:_col0, _col1, _col2,null sort order:zzz,sort order:+++,top n:100 - Select Operator [SEL_47] (rows=97302218447 width=381) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_213] (rows=97302218447 width=381) - Conds:RS_44._col3=RS_247._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_246] (rows=1704 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_245] (rows=1704 width=90) - predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col3,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_212] (rows=97302218447 width=299) - Conds:RS_41._col1, _col2, _col4=RS_42._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col6, _col7, _col8,null sort order:aaa,sort order:+++ - Merge Join Operator [MERGEJOIN_211] (rows=10910732684 width=19) - Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2, _col1,null sort order:aa,sort order:++ - Merge Join Operator [MERGEJOIN_209] (rows=285117831 width=11) - Conds:RS_241._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_222] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_218] (rows=304 width=4) - Output:["_col0"] - Filter Operator [FIL_215] (rows=304 width=94) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=94) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_240] (rows=285117831 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_239] (rows=285117831 width=15) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=287989836 width=15) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Group By Operator [GBY_237] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=304 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col1, _col2,null sort order:aa,sort order:++ - Merge Join Operator [MERGEJOIN_210] (rows=53632139 width=15) - Conds:RS_244._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_219] (rows=304 width=4) - Output:["_col0"] - Filter Operator [FIL_216] (rows=304 width=94) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_243] (rows=53632139 width=19) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_242] (rows=53632139 width=19) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_15] (rows=57591150 width=19) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col1, _col2, _col4,null sort order:aaa,sort order:+++ - Merge Join Operator [MERGEJOIN_208] (rows=501694138 width=303) - Conds:RS_38._col1=RS_236._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_235] (rows=462000 width=288) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_234] (rows=462000 width=288) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=288) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_207] (rows=501694138 width=19) - Conds:RS_233._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_217] (rows=101 width=4) - Output:["_col0"] - Filter Operator [FIL_214] (rows=101 width=94) - predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_232] (rows=501694138 width=23) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_231] (rows=501694138 width=23) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=23) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_230] - Group By Operator [GBY_229] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=101 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_217] + Select Operator [SEL_47] (rows=97302218447 width=381) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_212] (rows=97302218447 width=381) + Conds:RS_44._col3=RS_246._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_245] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_244] (rows=1704 width=90) + predicate:s_store_sk is not null + TableScan [TS_32] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col3,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_211] (rows=97302218447 width=299) + Conds:RS_41._col1, _col2, _col4=RS_42._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col6, _col7, _col8,null sort order:aaa,sort order:+++ + Merge Join Operator [MERGEJOIN_210] (rows=10910732684 width=19) + Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col2, _col1,null sort order:aa,sort order:++ + Merge Join Operator [MERGEJOIN_208] (rows=285117831 width=11) + Conds:RS_240._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_221] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_217] (rows=304 width=4) + Output:["_col0"] + Filter Operator [FIL_214] (rows=304 width=94) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=94) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_240] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_239] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_238] (rows=285117831 width=15) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_237] + Group By Operator [GBY_236] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_222] (rows=304 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_217] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2,null sort order:aa,sort order:++ + Merge Join Operator [MERGEJOIN_209] (rows=53632139 width=15) + Conds:RS_243._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_223] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_218] (rows=304 width=4) + Output:["_col0"] + Filter Operator [FIL_215] (rows=304 width=94) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_242] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_241] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_15] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1, _col2, _col4,null sort order:aaa,sort order:+++ + Merge Join Operator [MERGEJOIN_207] (rows=501694138 width=303) + Conds:RS_38._col1=RS_235._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_234] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_233] (rows=462000 width=288) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_206] (rows=501694138 width=19) + Conds:RS_232._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_219] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_216] (rows=101 width=4) + Output:["_col0"] + Filter Operator [FIL_213] (rows=101 width=94) + predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_231] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_230] (rows=501694138 width=23) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_229] + Group By Operator [GBY_228] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_226] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_220] (rows=101 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] diff --git ql/src/test/results/clientpositive/perf/tez/query27.q.out ql/src/test/results/clientpositive/perf/tez/query27.q.out index d915989654..cf0b15eb24 100644 --- ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -71,98 +71,96 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_126] - Limit [LIM_125] (rows=100 width=538) + File Output Operator [FS_125] + Limit [LIM_124] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_124] (rows=6526254 width=538) + Select Operator [SEL_123] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + SHUFFLE [RS_122] null sort order:zz,sort order:++ - Select Operator [SEL_122] (rows=6526254 width=538) + Select Operator [SEL_121] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_121] (rows=6526254 width=570) + Group By Operator [GBY_120] (rows=6526254 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2,null sort order:zza,sort order:+++ Group By Operator [GBY_29] (rows=13907934 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_56] (rows=4635978 width=186) - keys:_col0, _col1, 0L,null sort order:zza,sort order:+++,top n:100 - Select Operator [SEL_27] (rows=4635978 width=186) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=4635978 width=186) - Conds:RS_24._col1=RS_120._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_119] (rows=462000 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=462000 width=104) - predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=90) - Conds:RS_21._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_116] (rows=209 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=209 width=90) - predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col3,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_98] (rows=4635978 width=4) - Conds:RS_18._col0=RS_114._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_113] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_112] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_97] (rows=4635978 width=4) - Conds:RS_111._col2=RS_103._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_102] (rows=14776 width=4) - Output:["_col0"] - Filter Operator [FIL_101] (rows=14776 width=268) - predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=268) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] - PartitionCols:_col2,null sort order:a,sort order:+ - Select Operator [SEL_110] (rows=501690006 width=340) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_109] (rows=501690006 width=340) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=340) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=14776 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Select Operator [SEL_27] (rows=4635978 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=186) + Conds:RS_24._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_118] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_117] (rows=462000 width=104) + predicate:i_item_sk is not null + TableScan [TS_12] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_98] (rows=4635978 width=90) + Conds:RS_21._col3=RS_116._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_115] (rows=209 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_114] (rows=209 width=90) + predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) + TableScan [TS_9] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_97] (rows=4635978 width=4) + Conds:RS_18._col0=RS_113._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_112] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_111] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_96] (rows=4635978 width=4) + Conds:RS_110._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_101] (rows=14776 width=4) + Output:["_col0"] + Filter Operator [FIL_100] (rows=14776 width=268) + predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M') and cd_demo_sk is not null) + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + PartitionCols:_col2,null sort order:a,sort order:+ + Select Operator [SEL_109] (rows=501690006 width=340) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_108] (rows=501690006 width=340) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=340) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_103] (rows=14776 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_101] diff --git ql/src/test/results/clientpositive/perf/tez/query35.q.out ql/src/test/results/clientpositive/perf/tez/query35.q.out index 1936f22b61..cf258ddad3 100644 --- ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -153,190 +153,188 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_229] - Limit [LIM_228] (rows=1 width=352) + File Output Operator [FS_228] + Limit [LIM_227] (rows=1 width=352) Number of rows:100 - Select Operator [SEL_227] (rows=1 width=352) + Select Operator [SEL_226] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] + SHUFFLE [RS_225] null sort order:zzzzzz,sort order:++++++ - Select Operator [SEL_225] (rows=1 width=352) + Select Operator [SEL_224] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_224] (rows=1 width=336) + Group By Operator [GBY_223] (rows=1 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:zzzzzz,sort order:++++++ Group By Operator [GBY_66] (rows=2 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_104] (rows=1401496 width=276) - keys:_col4, _col6, _col7, _col8, _col9, _col10,null sort order:zzzzzz,sort order:++++++,top n:100 - Select Operator [SEL_65] (rows=1401496 width=276) - Output:["_col4","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_64] (rows=1401496 width=276) - predicate:(_col11 is not null or _col13 is not null) - Merge Join Operator [MERGEJOIN_182] (rows=1401496 width=276) - Conds:RS_61._col0=RS_223._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] - <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_61] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_181] (rows=1414922 width=276) - Conds:RS_58._col0=RS_215._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_214] (rows=1414922 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_213] (rows=1414922 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_29] (rows=143930993 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=143930993 width=3) - Conds:RS_212._col0=RS_196._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_193] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_192] (rows=652 width=12) - predicate:((d_year = 1999) and (d_qoy < 4) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_211] (rows=143930993 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_19] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Group By Operator [GBY_208] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] - Group By Operator [GBY_200] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_180] (rows=525327388 width=272) - Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_56] + Select Operator [SEL_65] (rows=1401496 width=276) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_64] (rows=1401496 width=276) + predicate:(_col11 is not null or _col13 is not null) + Merge Join Operator [MERGEJOIN_181] (rows=1401496 width=276) + Conds:RS_61._col0=RS_222._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] + <-Reducer 5 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_61] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_180] (rows=1414922 width=276) + Conds:RS_58._col0=RS_214._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_213] (rows=1414922 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_212] (rows=1414922 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_54] (rows=525327388 width=3) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=525327388 width=3) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_177] (rows=525327388 width=3) - Conds:RS_207._col0=RS_194._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_193] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_206] (rows=525327388 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=525327388 width=7) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - Group By Operator [GBY_199] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_176] (rows=78293105 width=272) - Conds:RS_50._col1=RS_191._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_190] (rows=1861800 width=186) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_189] (rows=1861800 width=186) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=186) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_175] (rows=77201384 width=93) - Conds:RS_185._col2=RS_188._col0(Inner),Output:["_col0","_col1","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - PartitionCols:_col2,null sort order:a,sort order:+ - Select Operator [SEL_184] (rows=77201384 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_183] (rows=77201384 width=11) - predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=11) - default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_187] (rows=40000000 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=40000000 width=90) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=90) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_222] (rows=1401496 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_221] (rows=1401496 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_44] + Group By Operator [GBY_29] (rows=143930993 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=143930993 width=3) + Conds:RS_211._col0=RS_195._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_192] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_191] (rows=652 width=12) + predicate:((d_year = 1999) and (d_qoy < 4) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_211] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_210] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_209] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_208] + Group By Operator [GBY_207] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=272) + Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_54] (rows=525327388 width=3) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=525327388 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_176] (rows=525327388 width=3) + Conds:RS_206._col0=RS_193._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_192] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_205] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_204] (rows=525327388 width=7) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_194] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_43] (rows=285115246 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=285115246 width=3) - Conds:RS_220._col0=RS_198._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_193] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_219] (rows=285115246 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_218] (rows=285115246 width=7) - predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_61_c_c_customer_sk_min) AND DynamicValue(RS_61_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_61_c_c_customer_sk_bloom_filter))) - TableScan [TS_33] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_165] (rows=1414922 width=4) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_181] + Merge Join Operator [MERGEJOIN_175] (rows=78293105 width=272) + Conds:RS_50._col1=RS_190._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_189] (rows=1861800 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_188] (rows=1861800 width=186) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=186) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_174] (rows=77201384 width=93) + Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_184] + PartitionCols:_col2,null sort order:a,sort order:+ + Select Operator [SEL_183] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_182] (rows=77201384 width=11) + predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_186] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_185] (rows=40000000 width=90) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_221] (rows=1401496 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_220] (rows=1401496 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_43] (rows=285115246 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_178] (rows=285115246 width=3) + Conds:RS_219._col0=RS_197._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_192] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_218] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_217] (rows=285115246 width=7) + predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_61_c_c_customer_sk_min) AND DynamicValue(RS_61_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_61_c_c_customer_sk_bloom_filter))) + TableScan [TS_33] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_164] (rows=1414922 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_180] diff --git ql/src/test/results/clientpositive/perf/tez/query40.q.out ql/src/test/results/clientpositive/perf/tez/query40.q.out index 61f3d7c748..6204c4e4f0 100644 --- ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -81,96 +81,94 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_126] - Limit [LIM_125] (rows=100 width=410) + File Output Operator [FS_125] + Limit [LIM_124] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_124] (rows=769995 width=410) + Select Operator [SEL_123] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + SHUFFLE [RS_122] null sort order:zz,sort order:++ - Group By Operator [GBY_122] (rows=769995 width=410) + Group By Operator [GBY_121] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1,null sort order:zz,sort order:++ Group By Operator [GBY_29] (rows=51819042 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Top N Key Operator [TNK_55] (rows=51819042 width=302) - keys:_col0, _col1,null sort order:zz,sort order:++,top n:100 - Select Operator [SEL_27] (rows=51819042 width=302) - Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_101] (rows=51819042 width=302) - Conds:RS_24._col1=RS_121._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_120] (rows=27 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=27 width=90) - predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=90) - default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_100] (rows=51819042 width=220) - Conds:RS_21._col2=RS_104._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_103] (rows=51333 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_102] (rows=51333 width=215) - predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) - TableScan [TS_9] (rows=462000 width=215) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_99] (rows=466374405 width=171) - Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_117] (rows=8116 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_116] (rows=8116 width=98) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_98] (rows=466374405 width=167) - Conds:RS_112._col2, _col3=RS_115._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - PartitionCols:_col2, _col3,null sort order:aa,sort order:++ - Select Operator [SEL_111] (rows=285115816 width=127) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_110] (rows=285115816 width=127) - predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_105] (rows=51333 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_103] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - PartitionCols:_col0, _col1,null sort order:aa,sort order:++ - Select Operator [SEL_114] (rows=28798881 width=117) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_113] (rows=28798881 width=117) - predicate:(cr_order_number is not null and cr_item_sk is not null) - TableScan [TS_3] (rows=28798881 width=117) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] + Select Operator [SEL_27] (rows=51819042 width=302) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_100] (rows=51819042 width=302) + Conds:RS_24._col1=RS_120._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_119] (rows=27 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_118] (rows=27 width=90) + predicate:w_warehouse_sk is not null + TableScan [TS_12] (rows=27 width=90) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_99] (rows=51819042 width=220) + Conds:RS_21._col2=RS_103._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_102] (rows=51333 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_101] (rows=51333 width=215) + predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) + TableScan [TS_9] (rows=462000 width=215) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_98] (rows=466374405 width=171) + Conds:RS_18._col0=RS_117._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_116] (rows=8116 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_115] (rows=8116 width=98) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_97] (rows=466374405 width=167) + Conds:RS_111._col2, _col3=RS_114._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + PartitionCols:_col2, _col3,null sort order:aa,sort order:++ + Select Operator [SEL_110] (rows=285115816 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_109] (rows=285115816 width=127) + predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] + Group By Operator [GBY_105] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_104] (rows=51333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_102] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + PartitionCols:_col0, _col1,null sort order:aa,sort order:++ + Select Operator [SEL_113] (rows=28798881 width=117) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_112] (rows=28798881 width=117) + predicate:(cr_order_number is not null and cr_item_sk is not null) + TableScan [TS_3] (rows=28798881 width=117) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git ql/src/test/results/clientpositive/perf/tez/query43.q.out ql/src/test/results/clientpositive/perf/tez/query43.q.out index 2e6f86c0a0..34585e5152 100644 --- ql/src/test/results/clientpositive/perf/tez/query43.q.out +++ ql/src/test/results/clientpositive/perf/tez/query43.q.out @@ -57,68 +57,66 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_74] - Limit [LIM_73] (rows=100 width=972) + File Output Operator [FS_73] + Limit [LIM_72] (rows=100 width=972) Number of rows:100 - Select Operator [SEL_72] (rows=3751 width=972) + Select Operator [SEL_71] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + SHUFFLE [RS_70] null sort order:zzzzzzzzz,sort order:+++++++++ - Group By Operator [GBY_70] (rows=3751 width=972) + Group By Operator [GBY_69] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1,null sort order:zz,sort order:++ Group By Operator [GBY_17] (rows=2486913 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Top N Key Operator [TNK_33] (rows=525329897 width=322) - keys:_col0, _col1,null sort order:zz,sort order:++,top n:100 - Select Operator [SEL_15] (rows=525329897 width=322) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_55] (rows=525329897 width=322) - Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_69] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_68] (rows=341 width=192) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_67] (rows=341 width=303) - predicate:((s_gmt_offset = -6) and s_store_sk is not null) - TableScan [TS_6] (rows=1704 width=303) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_54] (rows=525329897 width=138) - Conds:RS_66._col0=RS_58._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_58] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_57] (rows=652 width=32) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_56] (rows=652 width=99) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_66] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_65] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_64] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_63] - Group By Operator [GBY_62] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_61] - Group By Operator [GBY_60] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_59] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_57] + Select Operator [SEL_15] (rows=525329897 width=322) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_54] (rows=525329897 width=322) + Conds:RS_12._col1=RS_68._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_68] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_67] (rows=341 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_66] (rows=341 width=303) + predicate:((s_gmt_offset = -6) and s_store_sk is not null) + TableScan [TS_6] (rows=1704 width=303) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_53] (rows=525329897 width=138) + Conds:RS_65._col0=RS_57._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_57] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_56] (rows=652 width=32) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_55] (rows=652 width=99) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_65] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_64] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_63] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_62] + Group By Operator [GBY_61] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_60] + Group By Operator [GBY_59] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_58] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_56] diff --git ql/src/test/results/clientpositive/perf/tez/query45.q.out ql/src/test/results/clientpositive/perf/tez/query45.q.out index ab7181d7a4..b47e021e5e 100644 --- ql/src/test/results/clientpositive/perf/tez/query45.q.out +++ ql/src/test/results/clientpositive/perf/tez/query45.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[132][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item @@ -70,139 +70,137 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_171] - Limit [LIM_170] (rows=100 width=299) + File Output Operator [FS_170] + Limit [LIM_169] (rows=100 width=299) Number of rows:100 - Select Operator [SEL_169] (rows=17401956 width=299) + Select Operator [SEL_168] (rows=17401956 width=299) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] + SHUFFLE [RS_167] null sort order:zz,sort order:++ - Group By Operator [GBY_167] (rows=17401956 width=299) + Group By Operator [GBY_166] (rows=17401956 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col0, _col1,null sort order:zz,sort order:++ Group By Operator [GBY_52] (rows=143930993 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Top N Key Operator [TNK_82] (rows=143930993 width=310) - keys:_col8, _col7,null sort order:zz,sort order:++,top n:100 - Select Operator [SEL_51] (rows=143930993 width=310) - Output:["_col3","_col7","_col8"] - Filter Operator [FIL_50] (rows=143930993 width=310) - predicate:(((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) - Select Operator [SEL_49] (rows=143930993 width=310) - Output:["_col3","_col7","_col8","_col14","_col16"] - Merge Join Operator [MERGEJOIN_133] (rows=143930993 width=310) - Conds:(Inner),Output:["_col3","_col4","_col8","_col12","_col16"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] - Group By Operator [GBY_165] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_164] - Group By Operator [GBY_163] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_162] (rows=11 width=4) - Filter Operator [FIL_161] (rows=11 width=4) - predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) - TableScan [TS_33] (rows=462000 width=4) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_46] - Merge Join Operator [MERGEJOIN_132] (rows=143930993 width=302) - Conds:RS_43._col0=RS_44._col6(Inner),Output:["_col3","_col4","_col8","_col12"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col6,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_131] (rows=143930993 width=119) - Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col3","_col6","_col7"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_130] (rows=143930993 width=119) - Conds:RS_160._col0=RS_152._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_152] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_151] (rows=130 width=12) - Output:["_col0"] - Filter Operator [FIL_150] (rows=130 width=12) - predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) - TableScan [TS_19] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_159] (rows=143930993 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_158] (rows=143930993 width=123) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_16] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_157] - Group By Operator [GBY_156] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] - Group By Operator [GBY_154] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_153] (rows=130 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_151] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_129] (rows=462007 width=4) - Conds:RS_144._col1=RS_149._col0(Left Outer),Output:["_col0","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_142] (rows=462000 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_140] (rows=462000 width=104) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_148] (rows=5 width=104) - Output:["_col0","_col1"] - Group By Operator [GBY_147] (rows=5 width=100) - Output:["_col0"],keys:KEY._col0 - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_145] (rows=5 width=100) - Output:["_col0"],keys:i_item_id - Select Operator [SEL_143] (rows=11 width=104) - Output:["i_item_id"] - Filter Operator [FIL_141] (rows=11 width=104) - predicate:((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_128] (rows=80000000 width=191) - Conds:RS_136._col1=RS_139._col0(Inner),Output:["_col0","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_135] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=80000000 width=8) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_138] (rows=40000000 width=191) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_137] (rows=40000000 width=191) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=191) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] + Select Operator [SEL_51] (rows=143930993 width=310) + Output:["_col3","_col7","_col8"] + Filter Operator [FIL_50] (rows=143930993 width=310) + predicate:(((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) + Select Operator [SEL_49] (rows=143930993 width=310) + Output:["_col3","_col7","_col8","_col14","_col16"] + Merge Join Operator [MERGEJOIN_132] (rows=143930993 width=310) + Conds:(Inner),Output:["_col3","_col4","_col8","_col12","_col16"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_164] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_161] (rows=11 width=4) + Filter Operator [FIL_160] (rows=11 width=4) + predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + TableScan [TS_33] (rows=462000 width=4) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_46] + Merge Join Operator [MERGEJOIN_131] (rows=143930993 width=302) + Conds:RS_43._col0=RS_44._col6(Inner),Output:["_col3","_col4","_col8","_col12"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col6,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_130] (rows=143930993 width=119) + Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col3","_col6","_col7"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_129] (rows=143930993 width=119) + Conds:RS_159._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_151] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_150] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_149] (rows=130 width=12) + predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) + TableScan [TS_19] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_158] (rows=143930993 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_157] (rows=143930993 width=123) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_16] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_152] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_150] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_128] (rows=462007 width=4) + Conds:RS_143._col1=RS_148._col0(Left Outer),Output:["_col0","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_141] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_139] (rows=462000 width=104) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_147] (rows=5 width=104) + Output:["_col0","_col1"] + Group By Operator [GBY_146] (rows=5 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_144] (rows=5 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_142] (rows=11 width=104) + Output:["i_item_id"] + Filter Operator [FIL_140] (rows=11 width=104) + predicate:((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_127] (rows=80000000 width=191) + Conds:RS_135._col1=RS_138._col0(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_134] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_133] (rows=80000000 width=8) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_137] (rows=40000000 width=191) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_136] (rows=40000000 width=191) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=191) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] diff --git ql/src/test/results/clientpositive/perf/tez/query50.q.out ql/src/test/results/clientpositive/perf/tez/query50.q.out index 2a287ce6d3..1767443a54 100644 --- ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -139,85 +139,83 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_140] - Limit [LIM_139] (rows=100 width=858) + File Output Operator [FS_139] + Limit [LIM_138] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_138] (rows=478292911 width=857) + Select Operator [SEL_137] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_136] null sort order:zzzzzzzzzz,sort order:++++++++++ - Group By Operator [GBY_136] (rows=478292911 width=857) + Group By Operator [GBY_135] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,null sort order:zzzzzzzzzz,sort order:++++++++++ Group By Operator [GBY_29] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=478292911 width=825) - keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,null sort order:zzzzzzzzzz,sort order:++++++++++,top n:100 - Select Operator [SEL_27] (rows=478292911 width=825) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_120] (rows=478292911 width=825) - Conds:RS_24._col8=RS_135._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_134] (rows=1704 width=821) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_133] (rows=1704 width=821) - predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=821) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col8,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_119] (rows=478292911 width=11) - Conds:RS_21._col5=RS_132._col0(Inner),Output:["_col0","_col5","_col8"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_131] (rows=73049 width=4) - Output:["_col0"] - Filter Operator [FIL_130] (rows=73049 width=4) - predicate:d_date_sk is not null - TableScan [TS_9] (rows=73049 width=4) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col5,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_118] (rows=478292911 width=11) - Conds:RS_18._col1, _col2, _col3=RS_129._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - PartitionCols:_col1, _col2, _col4,null sort order:aaa,sort order:+++ - Select Operator [SEL_128] (rows=501694138 width=19) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_127] (rows=501694138 width=19) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null) - TableScan [TS_6] (rows=575995635 width=19) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1, _col2, _col3,null sort order:aaa,sort order:+++ - Merge Join Operator [MERGEJOIN_117] (rows=53632139 width=15) - Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_122] (rows=53632139 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_121] (rows=53632139 width=15) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null and sr_item_sk is not null) - TableScan [TS_0] (rows=57591150 width=15) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_125] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_124] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + Select Operator [SEL_27] (rows=478292911 width=825) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_119] (rows=478292911 width=825) + Conds:RS_24._col8=RS_134._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_134] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_133] (rows=1704 width=821) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_132] (rows=1704 width=821) + predicate:s_store_sk is not null + TableScan [TS_12] (rows=1704 width=821) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col8,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_118] (rows=478292911 width=11) + Conds:RS_21._col5=RS_131._col0(Inner),Output:["_col0","_col5","_col8"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_130] (rows=73049 width=4) + Output:["_col0"] + Filter Operator [FIL_129] (rows=73049 width=4) + predicate:d_date_sk is not null + TableScan [TS_9] (rows=73049 width=4) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col5,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_117] (rows=478292911 width=11) + Conds:RS_18._col1, _col2, _col3=RS_128._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col1, _col2, _col4,null sort order:aaa,sort order:+++ + Select Operator [SEL_127] (rows=501694138 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_126] (rows=501694138 width=19) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null) + TableScan [TS_6] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1, _col2, _col3,null sort order:aaa,sort order:+++ + Merge Join Operator [MERGEJOIN_116] (rows=53632139 width=15) + Conds:RS_122._col0=RS_125._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_121] (rows=53632139 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_120] (rows=53632139 width=15) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null and sr_item_sk is not null) + TableScan [TS_0] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_124] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_123] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/query66.q.out ql/src/test/results/clientpositive/perf/tez/query66.q.out index 23ffd637a3..82d58109ea 100644 --- ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -479,179 +479,175 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_254] - Select Operator [SEL_253] (rows=100 width=4614) + File Output Operator [FS_250] + Select Operator [SEL_249] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_252] (rows=100 width=4510) + Limit [LIM_248] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_251] (rows=2423925 width=4510) + Select Operator [SEL_247] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + SHUFFLE [RS_246] null sort order:z,sort order:+ - Group By Operator [GBY_249] (rows=2423925 width=4510) + Group By Operator [GBY_245] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 7 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_264] + Reduce Output Operator [RS_259] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:zaaaaa,sort order:++++++ - Group By Operator [GBY_263] (rows=2513727 width=4510) + Group By Operator [GBY_258] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_262] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:zaaaaa,sort order:++++++,top n:100 - Select Operator [SEL_261] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_260] (rows=2513700 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_63] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:aaaaaa,sort order:++++++ - Group By Operator [GBY_62] (rows=15681803 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_60] (rows=15681803 width=750) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_204] (rows=15681803 width=750) - Conds:RS_57._col3=RS_243._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_241] (rows=27 width=482) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_240] (rows=27 width=482) - predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=482) - default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col3,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_203] (rows=15681803 width=275) - Conds:RS_54._col2=RS_221._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_218] (rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_217] (rows=1 width=88) - predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) - TableScan [TS_9] (rows=1 width=88) - default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_202] (rows=282272460 width=279) - Conds:RS_51._col0=RS_239._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_237] (rows=652 width=52) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_236] (rows=652 width=12) - predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_201] (rows=282272460 width=235) - Conds:RS_259._col1=RS_235._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_233] (rows=33426 width=4) - Output:["_col0"] - Filter Operator [FIL_232] (rows=33426 width=8) - predicate:(t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=8) - default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_258] (rows=282272460 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_257] (rows=282272460 width=243) - predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_33] (rows=287989836 width=243) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=1 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Select Operator [SEL_257] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Group By Operator [GBY_256] (rows=2513700 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:aaaaaa,sort order:++++++ + Group By Operator [GBY_62] (rows=15681803 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_60] (rows=15681803 width=750) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Merge Join Operator [MERGEJOIN_203] (rows=15681803 width=750) + Conds:RS_57._col3=RS_240._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_240] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_238] (rows=27 width=482) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_237] (rows=27 width=482) + predicate:w_warehouse_sk is not null + TableScan [TS_12] (rows=27 width=482) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col3,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_202] (rows=15681803 width=275) + Conds:RS_54._col2=RS_218._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_215] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_214] (rows=1 width=88) + predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) + TableScan [TS_9] (rows=1 width=88) + default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col2,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_201] (rows=282272460 width=279) + Conds:RS_51._col0=RS_236._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_234] (rows=652 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_233] (rows=652 width=12) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_200] (rows=282272460 width=235) + Conds:RS_255._col1=RS_232._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_230] (rows=33426 width=4) + Output:["_col0"] + Filter Operator [FIL_229] (rows=33426 width=8) + predicate:(t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) + TableScan [TS_3] (rows=86400 width=8) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_254] (rows=282272460 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_253] (rows=282272460 width=243) + predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_33] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_252] + Group By Operator [GBY_251] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_219] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_215] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_248] + Reduce Output Operator [RS_244] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:zaaaaa,sort order:++++++ - Group By Operator [GBY_247] (rows=2513727 width=4510) + Group By Operator [GBY_243] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_246] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:zaaaaa,sort order:++++++,top n:100 - Select Operator [SEL_245] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_244] (rows=27 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:aaaaaa,sort order:++++++ - Group By Operator [GBY_29] (rows=27 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_27] (rows=7992175 width=750) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_200] (rows=7992175 width=750) - Conds:RS_24._col3=RS_242._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_241] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_199] (rows=7992175 width=275) - Conds:RS_21._col2=RS_219._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_218] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_198] (rows=143859154 width=279) - Conds:RS_18._col0=RS_238._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_237] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_197] (rows=143859154 width=235) - Conds:RS_231._col1=RS_234._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_233] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_230] (rows=143859154 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_229] (rows=143859154 width=243) - predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_228] - Group By Operator [GBY_227] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_223] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=1 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Select Operator [SEL_242] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Group By Operator [GBY_241] (rows=27 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5,null sort order:aaaaaa,sort order:++++++ + Group By Operator [GBY_29] (rows=27 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_27] (rows=7992175 width=750) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Merge Join Operator [MERGEJOIN_199] (rows=7992175 width=750) + Conds:RS_24._col3=RS_239._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_239] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_238] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_198] (rows=7992175 width=275) + Conds:RS_21._col2=RS_216._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_215] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_197] (rows=143859154 width=279) + Conds:RS_18._col0=RS_235._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_234] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_196] (rows=143859154 width=235) + Conds:RS_228._col1=RS_231._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_230] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_227] (rows=143859154 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_226] (rows=143859154 width=243) + predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] + Group By Operator [GBY_220] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_217] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_215] diff --git ql/src/test/results/clientpositive/perf/tez/query69.q.out ql/src/test/results/clientpositive/perf/tez/query69.q.out index d27f4060f6..ab68299022 100644 --- ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -133,194 +133,192 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_231] - Limit [LIM_230] (rows=1 width=383) + File Output Operator [FS_230] + Limit [LIM_229] (rows=1 width=383) Number of rows:100 - Select Operator [SEL_229] (rows=1 width=383) + Select Operator [SEL_228] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] + SHUFFLE [RS_227] null sort order:zzzzz,sort order:+++++ - Select Operator [SEL_227] (rows=1 width=383) + Select Operator [SEL_226] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_226] (rows=1 width=367) + Group By Operator [GBY_225] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2, _col3, _col4,null sort order:zzzzz,sort order:+++++ Group By Operator [GBY_68] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_105] (rows=1 width=363) - keys:_col6, _col7, _col8, _col9, _col10,null sort order:zzzzz,sort order:+++++,top n:100 - Select Operator [SEL_67] (rows=1 width=363) - Output:["_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_66] (rows=1 width=363) - predicate:_col13 is null - Merge Join Operator [MERGEJOIN_184] (rows=1401496 width=363) - Conds:RS_63._col0=RS_225._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] - <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_63] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_48] (rows=1 width=367) - Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_47] (rows=1 width=367) - predicate:_col11 is null - Merge Join Operator [MERGEJOIN_183] (rows=1414922 width=367) - Conds:RS_44._col0=RS_217._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_216] (rows=1414922 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_215] (rows=1414922 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_29] (rows=143930993 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=143930993 width=3) - Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_195] (rows=150 width=4) - Output:["_col0"] - Filter Operator [FIL_194] (rows=150 width=12) - predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_213] (rows=143930993 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_212] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_19] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] - Group By Operator [GBY_202] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_199] (rows=150 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_195] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_182] (rows=525327388 width=363) - Conds:RS_41._col0=RS_42._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_40] (rows=525327388 width=3) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=525327388 width=3) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=3) - Conds:RS_209._col0=RS_196._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_195] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_208] (rows=525327388 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=525327388 width=7) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] - Group By Operator [GBY_201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=150 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_195] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] + Select Operator [SEL_67] (rows=1 width=363) + Output:["_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_66] (rows=1 width=363) + predicate:_col13 is null + Merge Join Operator [MERGEJOIN_183] (rows=1401496 width=363) + Conds:RS_63._col0=RS_224._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] + <-Reducer 5 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_63] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_48] (rows=1 width=367) + Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_47] (rows=1 width=367) + predicate:_col11 is null + Merge Join Operator [MERGEJOIN_182] (rows=1414922 width=367) + Conds:RS_44._col0=RS_216._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_215] (rows=1414922 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_214] (rows=1414922 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_178] (rows=4605476 width=363) - Conds:RS_36._col1=RS_193._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_192] (rows=1861800 width=363) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_191] (rows=1861800 width=363) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=363) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1,null sort order:a,sort order:+ - Merge Join Operator [MERGEJOIN_177] (rows=4541258 width=5) - Conds:RS_187._col2=RS_190._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] - PartitionCols:_col2,null sort order:a,sort order:+ - Select Operator [SEL_186] (rows=77201384 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_185] (rows=77201384 width=11) - predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=11) - default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_189] (rows=2352941 width=90) - Output:["_col0"] - Filter Operator [FIL_188] (rows=2352941 width=90) - predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=90) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col1,null sort order:a,sort order:+ - Select Operator [SEL_224] (rows=1401496 width=7) - Output:["_col0","_col1"] - Group By Operator [GBY_223] (rows=1401496 width=3) - Output:["_col0"],keys:KEY._col0 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_60] + Group By Operator [GBY_29] (rows=143930993 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_179] (rows=143930993 width=3) + Conds:RS_213._col0=RS_197._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_194] (rows=150 width=4) + Output:["_col0"] + Filter Operator [FIL_193] (rows=150 width=12) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_212] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_211] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_198] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_194] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0,null sort order:a,sort order:+ - Group By Operator [GBY_59] (rows=285115246 width=3) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_181] (rows=285115246 width=3) - Conds:RS_222._col0=RS_200._col0(Inner),Output:["_col1"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] - PartitionCols:_col0,null sort order:a,sort order:+ - Please refer to the previous Select Operator [SEL_195] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] - PartitionCols:_col0,null sort order:a,sort order:+ - Select Operator [SEL_221] (rows=285115246 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_220] (rows=285115246 width=7) - predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_63_c_c_customer_sk_min) AND DynamicValue(RS_63_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_63_c_c_customer_sk_bloom_filter))) - TableScan [TS_49] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_166] (rows=1 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_48] + Merge Join Operator [MERGEJOIN_181] (rows=525327388 width=363) + Conds:RS_41._col0=RS_42._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_40] (rows=525327388 width=3) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=525327388 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_178] (rows=525327388 width=3) + Conds:RS_208._col0=RS_195._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_194] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_207] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_206] (rows=525327388 width=7) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_202] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_194] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_177] (rows=4605476 width=363) + Conds:RS_36._col1=RS_192._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_192] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_191] (rows=1861800 width=363) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_190] (rows=1861800 width=363) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=363) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1,null sort order:a,sort order:+ + Merge Join Operator [MERGEJOIN_176] (rows=4541258 width=5) + Conds:RS_186._col2=RS_189._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col2,null sort order:a,sort order:+ + Select Operator [SEL_185] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_184] (rows=77201384 width=11) + predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_188] (rows=2352941 width=90) + Output:["_col0"] + Filter Operator [FIL_187] (rows=2352941 width=90) + predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col1,null sort order:a,sort order:+ + Select Operator [SEL_223] (rows=1401496 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_222] (rows=1401496 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0,null sort order:a,sort order:+ + Group By Operator [GBY_59] (rows=285115246 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_180] (rows=285115246 width=3) + Conds:RS_221._col0=RS_199._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] + PartitionCols:_col0,null sort order:a,sort order:+ + Please refer to the previous Select Operator [SEL_194] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0,null sort order:a,sort order:+ + Select Operator [SEL_220] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_219] (rows=285115246 width=7) + predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_63_c_c_customer_sk_min) AND DynamicValue(RS_63_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_63_c_c_customer_sk_bloom_filter))) + TableScan [TS_49] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_167] + Group By Operator [GBY_166] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_165] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_48] diff --git ql/src/test/results/clientpositive/tez/topnkey.q.out ql/src/test/results/clientpositive/tez/topnkey.q.out index 45947d0be8..e4f65211e9 100644 --- ql/src/test/results/clientpositive/tez/topnkey.q.out +++ ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -167,11 +167,39 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false +<<<<<<< HEAD Stage: Stage-0 Fetch Operator limit: 5 Processor Tree: ListSink +======= +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + Limit [LIM_9] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_8] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_7] + Group By Operator [GBY_5] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_11] (rows=500 width=178) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +>>>>>>> HIVE-20150: TopNKey pushdown PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY @@ -222,10 +250,17 @@ Stage-0 PartitionCols:_col0,null sort order:z,sort order:+ Group By Operator [GBY_2] (rows=250 width=87) Output:["_col0"],keys:key +<<<<<<< HEAD Top N Key Operator [TNK_10] (rows=500 width=87) keys:key,null sort order:z,sort order:+,top n:5 Select Operator [SEL_1] (rows=500 width=87) Output:["key"] +======= + Select Operator [SEL_1] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_10] (rows=500 width=87) + keys:key,sort order:+,top n:5 +>>>>>>> HIVE-20150: TopNKey pushdown TableScan [TS_0] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -242,13 +277,13 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### @@ -352,16 +387,581 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_25] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_25] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=178) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=178) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +<<<<<<< HEAD +======= +<<<<<<< HEAD +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL, NULL), +(5, 2, 3), +(NULL, NULL, NULL), +(NULL, NULL, NULL), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2), +(NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL, NULL), +(5, 2, 3), +(NULL, NULL, NULL), +(NULL, NULL, NULL), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2), +(NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:3 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + Limit [LIM_8] (rows=3 width=5) + Number of rows:3 + Select Operator [SEL_7] (rows=6 width=4) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_4] (rows=6 width=4) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=6 width=4) + Output:["_col0","_col1"],keys:a, b + Top N Key Operator [TNK_10] (rows=12 width=6) + keys:a, b,sort order:++,top n:3 + Select Operator [SEL_1] (rows=12 width=6) + Output:["a","b"] + TableScan [TS_0] (rows=12 width=6) + default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 1 +5 2 +6 2 +PREHOOK: query: EXPLAIN +SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:2 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + Limit [LIM_8] (rows=2 width=10) + Number of rows:2 + Select Operator [SEL_7] (rows=4 width=10) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_4] (rows=4 width=10) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0 + Group By Operator [GBY_2] (rows=4 width=10) + Output:["_col0","_col1"],aggregations:["count(b)"],keys:a + Top N Key Operator [TNK_10] (rows=12 width=6) + keys:a,sort order:+,top n:2 + Select Operator [SEL_1] (rows=12 width=6) + Output:["a","b"] + TableScan [TS_0] (rows=12 width=6) + default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] + +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 4 +6 1 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL 0 +5 4 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 4 +6 1 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 4 +6 1 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL 0 +5 4 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 4 +6 1 +PREHOOK: query: DROP TABLE IF EXISTS t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE IF EXISTS t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(NULL, NULL, NULL), +(5, 2, 3), +(NULL, NULL, NULL), +(NULL, NULL, NULL), +(6, 2, 1), +(5, 1, 2), (5, 1, 2), (5, 1, 2), +(NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(NULL, NULL, NULL), +(5, 2, 3), +(NULL, NULL, NULL), +(NULL, NULL, NULL), +(6, 2, 1), +(5, 1, 2), (5, 1, 2), (5, 1, 2), +(NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +7 3 +6 1 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL 0 +7 3 +PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +7 3 +6 1 +PREHOOK: query: DROP TABLE IF EXISTS t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE IF EXISTS t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test +======= +10 val_10 +100 val_100 +103 val_103 +104 val_104 +>>>>>>> HIVE-20150: TopNKey pushdown +>>>>>>> HIVE-20150: TopNKey pushdown diff --git ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ql/src/test/results/clientpositive/tez/vector_topnkey.q.out index d179013e28..217f64ebe9 100644 --- ql/src/test/results/clientpositive/tez/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -1,3 +1,4 @@ +<<<<<<< HEAD PREHOOK: query: CREATE TABLE t_test( cint1 int, cint2 int, @@ -43,6 +44,15 @@ POSTHOOK: query: INSERT INTO t_test VALUES (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (NULL, NULL, NULL, NULL, NULL, NULL) +======= +PREHOOK: query: explain vectorization +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +>>>>>>> HIVE-20150: TopNKey pushdown POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@t_test @@ -60,7 +70,26 @@ PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3 POSTHOOK: type: QUERY +<<<<<<< HEAD POSTHOOK: Input: default@t_test +======= +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +>>>>>>> HIVE-20150: TopNKey pushdown POSTHOOK: Output: hdfs://### HDFS PATH ### PLAN VECTORIZATION: enabled: true @@ -240,17 +269,33 @@ POSTHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@t_test POSTHOOK: Output: hdfs://### HDFS PATH ### +<<<<<<< HEAD 4 6 7 PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3 +======= +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +>>>>>>> HIVE-20150: TopNKey pushdown PREHOOK: type: QUERY PREHOOK: Input: default@t_test PREHOOK: Output: hdfs://### HDFS PATH ### +<<<<<<< HEAD POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3 +======= +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +>>>>>>> HIVE-20150: TopNKey pushdown POSTHOOK: type: QUERY POSTHOOK: Input: default@t_test POSTHOOK: Output: hdfs://### HDFS PATH ### +<<<<<<< HEAD 4 1 4 2 6 2 @@ -288,13 +333,210 @@ five 2.0 four 4.5 one 2.0 PREHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3 +======= +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +>>>>>>> HIVE-20150: TopNKey pushdown PREHOOK: type: QUERY PREHOOK: Input: default@t_test PREHOOK: Output: hdfs://### HDFS PATH ### +<<<<<<< HEAD POSTHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3 +======= +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +>>>>>>> HIVE-20150: TopNKey pushdown POSTHOOK: type: QUERY POSTHOOK: Input: default@t_test POSTHOOK: Output: hdfs://### HDFS PATH ### +<<<<<<< HEAD 1.80 1.80000 2.00 2.00000 3.30 3.30000 @@ -306,3 +548,282 @@ POSTHOOK: query: DROP TABLE t_test POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@t_test POSTHOOK: Output: default@t_test +======= +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_28._col0=RS_30._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_26] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_27._col0=RS_30._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_28] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_26] + Limit [LIM_25] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_24] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=178) + Conds:RS_19._col0=RS_21._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_25] + Limit [LIM_24] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_23] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_22] + Group By Operator [GBY_21] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=178) + Conds:RS_18._col0=RS_20._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_20] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +>>>>>>> HIVE-20150: TopNKey pushdown diff --git ql/src/test/results/clientpositive/topnkey.q.out ql/src/test/results/clientpositive/topnkey.q.out index cecbe89b1c..6d4a822588 100644 --- ql/src/test/results/clientpositive/topnkey.q.out +++ ql/src/test/results/clientpositive/topnkey.q.out @@ -1,18 +1,13 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) AS `$f1` -FROM `default`.`src` -GROUP BY `key` -ORDER BY `key` -LIMIT 5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -25,7 +20,6 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) outputColumnNames: _col0, _col1 @@ -43,65 +37,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [$hdt$_0:src] - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -111,66 +48,22 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -181,26 +74,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -208,6 +86,19 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -222,50 +113,95 @@ POSTHOOK: Input: default@src 103 206 104 208 PREHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) - null sort order: z + null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -273,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -281,19 +217,20 @@ STAGE PLANS: key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -305,37 +242,47 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -343,21 +290,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -379,16 +322,18 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -397,21 +342,45 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) - null sort order: z + null sort order: a sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -429,16 +398,253 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: a (type: int), b (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test