diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index b7c1250220..0ccaeea1da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -28,6 +28,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY; @@ -38,10 +41,13 @@ private static final long serialVersionUID = 1L; - private transient TopNKeyFilter topNKeyFilter; + private transient Map> topNKeyFilters; + private transient KeyWrapper partitionKeyWrapper; private transient KeyWrapper keyWrapper; + private transient KeyWrapperComparator keyWrapperComparator; + /** Kryo ctor. */ public TopNKeyOperator() { super(); @@ -55,38 +61,62 @@ public TopNKeyOperator(CompilationOpContext ctx) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - String columnSortOrder = conf.getColumnSortOrder(); - String nullSortOrder = conf.getNullOrder(); - ObjectInspector rowInspector = inputObjInspectors[0]; outputObjInspector = rowInspector; + int numPartitionKeys = conf.getPartitionKeyColumns().size(); + List keyColumns = conf.getKeyColumns().subList(numPartitionKeys, conf.getKeyColumns().size()); + String columnSortOrder = conf.getColumnSortOrder().substring(numPartitionKeys); + String nullSortOrder = conf.getNullOrder().substring(numPartitionKeys); + // init keyFields - int numKeys = conf.getKeyColumns().size(); - ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numKeys]; - ObjectInspector[] keyObjectInspectors = new ObjectInspector[numKeys]; - ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[numKeys]; + ObjectInspector[] keyObjectInspectors = new ObjectInspector[keyColumns.size()]; + ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[keyColumns.size()]; + keyWrapper = initObjectInspectors(hconf, keyColumns, rowInspector, keyObjectInspectors, currentKeyObjectInspectors); + ObjectInspector[] partitionKeyObjectInspectors = new ObjectInspector[numPartitionKeys]; + ObjectInspector[] partitionCurrentKeyObjectInspectors = new ObjectInspector[numPartitionKeys]; + partitionKeyWrapper = initObjectInspectors(hconf, conf.getPartitionKeyColumns(), rowInspector, + partitionKeyObjectInspectors, partitionCurrentKeyObjectInspectors); + + keyWrapperComparator = new KeyWrapperComparator( + keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder.toString(), nullSortOrder.toString()); + + this.topNKeyFilters = new HashMap<>(); + } - for (int i = 0; i < numKeys; i++) { - ExprNodeDesc key = conf.getKeyColumns().get(i); + private KeyWrapper initObjectInspectors(Configuration hconf, + List keyColumns, + ObjectInspector rowInspector, + ObjectInspector[] keyObjectInspectors, + ObjectInspector[] currentKeyObjectInspectors) throws HiveException { + ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[keyColumns.size()]; + for (int i = 0; i < keyColumns.size(); i++) { + ExprNodeDesc key = keyColumns.get(i); keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], - ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE); + ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE); } - this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), new KeyWrapperComparator( - keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder, nullSortOrder)); - KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors); - keyWrapper = keyWrapperFactory.getKeyWrapper(); + return keyWrapperFactory.getKeyWrapper(); } @Override public void process(Object row, int tag) throws HiveException { + partitionKeyWrapper.getNewKey(row, inputObjInspectors[tag]); + partitionKeyWrapper.setHashKey(); + + TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKeyWrapper); + if (topNKeyFilter == null) { + topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), keyWrapperComparator); + topNKeyFilters.put(partitionKeyWrapper.copyKey(), topNKeyFilter); + } + keyWrapper.getNewKey(row, inputObjInspectors[tag]); keyWrapper.setHashKey(); + if (topNKeyFilter.canForward(keyWrapper)) { forward(row, outputObjInspector); } @@ -94,7 +124,7 @@ public void process(Object row, int tag) throws HiveException { @Override protected final void closeOp(boolean abort) throws HiveException { - topNKeyFilter.clear(); + topNKeyFilters.clear(); super.closeOp(abort); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java index ce6efa4919..a9ff6b4a83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java @@ -26,12 +26,14 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Collections; import java.util.List; import java.util.Stack; @@ -58,20 +60,25 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - // Currently, per partitioning top n key is not supported - // in TopNKey operator - if (reduceSinkDesc.isPTFReduceSink()) { - return null; - } - // Check whether there already is a top n key operator Operator parentOperator = reduceSinkOperator.getParentOperators().get(0); if (parentOperator instanceof TopNKeyOperator) { return null; } + List partitionCols = Collections.emptyList(); + if (reduceSinkDesc.isPTFReduceSink()) { + // All keys are partition keys or no keys at all + // Note: partition cols are prefix of key cols + if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) { + return null; + } + + partitionCols = reduceSinkDesc.getPartitionCols(); + } + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), - reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols()); + reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols); copyDown(reduceSinkOperator, topNKeyDesc); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java index 348fbb5faf..d2ae723574 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java @@ -154,13 +154,12 @@ private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticExce final TopNKeyDesc topNKeyDesc = topNKey.getConf(); CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), commonKeyPrefix.getMappedOrder(), - commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(groupBy, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { @@ -184,13 +183,12 @@ private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticE final TopNKeyDesc topNKeyDesc = topNKey.getConf(); CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, reduceSinkDesc); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), reduceSink.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), - commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(reduceSink, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { @@ -242,14 +240,13 @@ private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws Semant reduceSinkDesc.getColumnExprMap(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder()); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), reduceSinkOperator.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), - commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java index f752967a96..b5ba5ff20b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.optimizer.topnkey.CommonKeyPrefix; import org.apache.hadoop.hive.ql.plan.Explain.Level; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; /** * TopNKeyDesc. @@ -29,12 +31,14 @@ */ @Explain(displayName = "Top N Key Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TopNKeyDesc extends AbstractOperatorDesc { + private static final long serialVersionUID = 1L; private int topN; private String columnSortOrder; private String nullOrder; private List keyColumns; + private List partitionKeyColumns; public TopNKeyDesc() { } @@ -43,13 +47,14 @@ public TopNKeyDesc( final int topN, final String columnSortOrder, final String nullOrder, - final List keyColumns) { + final List keyColumns, + final List partitionKeyColumns) { this.topN = topN; - this.keyColumns = new ArrayList<>(keyColumns.size()); StringBuilder sortOrder = new StringBuilder(columnSortOrder.length()); StringBuilder nullSortOrder = new StringBuilder(nullOrder.length()); + this.partitionKeyColumns = new ArrayList<>(partitionKeyColumns.size()); for (int i = 0; i < keyColumns.size(); ++i) { ExprNodeDesc keyExpression = keyColumns.get(i); @@ -63,6 +68,13 @@ public TopNKeyDesc( this.columnSortOrder = sortOrder.toString(); this.nullOrder = nullSortOrder.toString(); + + for (ExprNodeDesc keyExpression : partitionKeyColumns) { + if (keyExpression instanceof ExprNodeConstantDesc) { + continue; + } + this.partitionKeyColumns.add(keyExpression); + } } @Explain(displayName = "top n", explainLevels = { Level.DEFAULT, Level.EXTENDED, Level.USER }) @@ -118,6 +130,25 @@ public void setKeyColumns(List keyColumns) { return ret; } + public List getPartitionKeyColumns() { + return partitionKeyColumns; + } + + public void setPartitionKeyColumns(List partitionKeyColumns) { + this.partitionKeyColumns = partitionKeyColumns; + } + + @Explain(displayName = "Map-reduce partition columns") + public String getPartitionKeyString() { + return PlanUtils.getExprListString(partitionKeyColumns); + } + + @Explain(displayName = "PartitionCols", explainLevels = { Level.USER }) + public String getUserLevelExplainPartitionKeyString() { + return PlanUtils.getExprListString(partitionKeyColumns, true); + } + + @Override public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { @@ -164,4 +195,11 @@ public TopNKeyDescExplainVectorization getTopNKeyVectorization() { } return new TopNKeyDescExplainVectorization(this, vectorTopNKeyDesc); } + + public TopNKeyDesc combine(CommonKeyPrefix commonKeyPrefix) { + return new TopNKeyDesc(topN, commonKeyPrefix.getMappedOrder(), + commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns(), + commonKeyPrefix.getMappedColumns().subList(0, partitionKeyColumns.size())); + } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java new file mode 100644 index 0000000000..90c0175544 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertThat; + +import java.util.Comparator; +import java.util.Objects; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.junit.Test; + +/** + * Unit test of TopNKeyFilter. + */ +public class TestTopNKeyFilter { + + public static final Comparator TEST_KEY_WRAPPER_COMPARATOR = Comparator.comparingInt(o -> o.keyValue); + + @Test + public void testNothingCanBeForwardedIfTopNIs0() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(0, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false)); + } + + @Test + public void testFirstTopNKeysCanBeForwarded() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(3, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false)); + } + + @Test + public void testKeyCanNotBeForwardedIfItIsDroppedOutFromTopNKeys() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(2)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(false)); + } + + @Test + public void testMembersOfTopNKeysStillCanBeForwardedAfterNonTopNKeysTried() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(false)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + } + + /** + * Test implementation of KeyWrapper. + */ + private static class TestKeyWrapper extends KeyWrapper { + + private final int keyValue; + + public TestKeyWrapper(int keyValue) { + this.keyValue = keyValue; + } + + @Override + public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException { + + } + + @Override + public void setHashKey() { + + } + + @Override + public KeyWrapper copyKey() { + return new TestKeyWrapper(this.keyValue); + } + + @Override + public void copyKey(KeyWrapper oldWrapper) { + + } + + @Override + public Object[] getKeyArray() { + return new Object[0]; + } + + @Override + public boolean isCopy() { + return false; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TestKeyWrapper that = (TestKeyWrapper) o; + return keyValue == that.keyValue; + } + + @Override + public int hashCode() { + return Objects.hash(keyValue); + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java index 51d2382278..a3e43ceb38 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java @@ -182,4 +182,4 @@ public void testmapWhenKeyCountsMismatch() { assertThat(commonPrefix.getMappedOrder(), is("+")); assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); } -} +} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q index a5b3ce7951..96ed1bae41 100644 --- ql/src/test/queries/clientpositive/subquery_in.q +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -3,6 +3,7 @@ --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q index f8636453c2..f25168ab77 100644 --- ql/src/test/queries/clientpositive/subquery_notin.q +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -2,6 +2,7 @@ --! qt:dataset:part --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; +set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/topnkey_windowing.q ql/src/test/queries/clientpositive/topnkey_windowing.q new file mode 100644 index 0000000000..a5352d2d6c --- /dev/null +++ ql/src/test/queries/clientpositive/topnkey_windowing.q @@ -0,0 +1,110 @@ +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1431655765; +SET hive.vectorized.execution.enabled=false; + + +CREATE TABLE topnkey_windowing (tw_code string, tw_value double); +INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +DROP TABLE topnkey_windowing; diff --git ql/src/test/queries/clientpositive/topnkey_windowing_order.q ql/src/test/queries/clientpositive/topnkey_windowing_order.q new file mode 100644 index 0000000000..686d72a3cd --- /dev/null +++ ql/src/test/queries/clientpositive/topnkey_windowing_order.q @@ -0,0 +1,109 @@ +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1431655765; +SET hive.vectorized.execution.enabled=false; + + +CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double); +INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +DROP TABLE topnkey_windowing; diff --git ql/src/test/queries/clientpositive/vector_windowing_streaming.q ql/src/test/queries/clientpositive/vector_windowing_streaming.q index e1011f9949..2f7b628db3 100644 --- ql/src/test/queries/clientpositive/vector_windowing_streaming.q +++ ql/src/test/queries/clientpositive/vector_windowing_streaming.q @@ -5,6 +5,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; set hive.vectorized.execution.ptf.enabled=true; set hive.fetch.task.conversion=none; +set hive.optimize.topnkey=false; drop table over10k_n8; diff --git ql/src/test/queries/clientpositive/windowing_filter.q ql/src/test/queries/clientpositive/windowing_filter.q index 2483c18416..14d0c5a7c8 100644 --- ql/src/test/queries/clientpositive/windowing_filter.q +++ ql/src/test/queries/clientpositive/windowing_filter.q @@ -1,5 +1,6 @@ set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=1431655765; +set hive.optimize.topnkey=false; create table testtable_n1000 (s_state string, ss_net_profit double); diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 4f0e75289f..ea8fe5ea96 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -3434,19 +3434,13 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col3 (type: string) + Reduce Output Operator + key expressions: _col3 (type: string) null sort order: z + sort order: + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - top n: 4 - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 67e349bb59..c24b79db86 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -4512,7 +4512,7 @@ POSTHOOK: Input: default@part 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4615,23 +4615,17 @@ STAGE PLANS: Filter Operator predicate: ((_col12 is null or (_col9 = 0L)) and ((_col10 >= _col9) or (_col9 = 0L) or _col12 is not null or _col5 is null) and (_col5 is not null or (_col9 = 0L) or _col12 is not null)) (type: boolean) Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col3 (type: string), _col0 (type: int) - null sort order: zz - Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE - top n: 4 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4727,7 +4721,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/topnkey_grouping_sets.q.out ql/src/test/results/clientpositive/llap/topnkey_grouping_sets.q.out index 41a8c3a5b2..56738c4eff 100644 --- ql/src/test/results/clientpositive/llap/topnkey_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/topnkey_grouping_sets.q.out @@ -267,11 +267,11 @@ POSTHOOK: Input: default@t_test_grouping_sets 5 NULL 6 NULL 6 2 -7 8 7 NULL +7 8 10 11 10 NULL -NULL 2 +NULL 1 PREHOOK: query: SELECT a, b FROM t_test_grouping_sets GROUP BY a,b GROUPING SETS ((a,b), (a), (b), ()) ORDER BY a LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@t_test_grouping_sets diff --git ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out index d6d76fb729..191d34ad53 100644 --- ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out +++ ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out @@ -143,12 +143,12 @@ POSTHOOK: query: SELECT a, b FROM t_test_grouping_sets GROUP BY a,b GROUPING SET POSTHOOK: type: QUERY POSTHOOK: Input: default@t_test_grouping_sets #### A masked pattern was here #### -10 NULL -NULL NULL -NULL NULL -NULL NULL 5 NULL 6 NULL +7 NULL +NULL NULL +10 NULL +NULL NULL NULL NULL PREHOOK: query: SELECT a, b FROM t_test_grouping_sets GROUP BY a,b GROUPING SETS ((a,b), (a), (b), ()) ORDER BY b NULLS FIRST LIMIT 7 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out new file mode 100644 index 0000000000..52ba490c01 --- /dev/null +++ ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out @@ -0,0 +1,626 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_code (type: string), tw_value (type: double) + null sort order: az + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Top N Key Operator + sort order: + + keys: tw_value (type: double) + null sort order: z + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [topnkey_windowing] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_code (type: string), tw_value (type: double) + null sort order: az + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: dense_rank_window_0 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (dense_rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), dense_rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out new file mode 100644 index 0000000000..9721705b26 --- /dev/null +++ ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out @@ -0,0 +1,568 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + sort order: ++ + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++- + keys: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + sort order: ++- + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2, _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2378 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: +++ + keys: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + sort order: +++ + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 786bbe12c8..c1340962fb 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -19,7 +19,7 @@ POSTHOOK: type: CREATE_MATERIALIZED_VIEW POSTHOOK: Input: default@store_sales POSTHOOK: Output: database:default POSTHOOK: Output: default@mv_store_sales_item_customer -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -112,10 +112,10 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_147] - Limit [LIM_146] (rows=100 width=218) + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_145] (rows=6951 width=218) + Select Operator [SEL_147] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_70] @@ -123,100 +123,104 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) - Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"] + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) + Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_142] (rows=462000 width=111) + Select Operator [SEL_144] (rows=462000 width=111) Output:["_col0","_col1"] TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115) - Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) + Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + SHUFFLE [RS_145] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_142] + Please refer to the previous Select Operator [SEL_144] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12) - Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) + Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_138] PartitionCols:_col1 - Select Operator [SEL_135] (rows=6951 width=8) + Select Operator [SEL_137] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=6951 width=116) + Filter Operator [FIL_136] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_133] (rows=20854 width=116) + PTF Operator [PTF_135] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_132] (rows=20854 width=116) + Select Operator [SEL_134] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_20] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_131] - Select Operator [SEL_130] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_129] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_128] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_127] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Select Operator [SEL_132] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_131] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_130] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_129] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_127] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_126] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_125] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_121] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Group By Operator [GBY_125] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_124] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_123] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_119] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0 - Group By Operator [GBY_117] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_116] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_115] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_119] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_118] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_117] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col1 - Select Operator [SEL_140] (rows=6951 width=8) + Select Operator [SEL_142] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_139] (rows=6951 width=116) + Filter Operator [FIL_141] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_138] (rows=20854 width=116) + PTF Operator [PTF_140] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_137] (rows=20854 width=116) + Select Operator [SEL_139] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_20] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_20] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index eace7a0685..8a201c2ef5 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -91,10 +91,10 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_147] - Limit [LIM_146] (rows=100 width=218) + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_145] (rows=6951 width=218) + Select Operator [SEL_147] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_70] @@ -102,100 +102,104 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) - Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"] + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) + Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_142] (rows=462000 width=111) + Select Operator [SEL_144] (rows=462000 width=111) Output:["_col0","_col1"] TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115) - Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) + Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + SHUFFLE [RS_145] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_142] + Please refer to the previous Select Operator [SEL_144] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12) - Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) + Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_138] PartitionCols:_col1 - Select Operator [SEL_135] (rows=6951 width=8) + Select Operator [SEL_137] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=6951 width=116) + Filter Operator [FIL_136] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_133] (rows=20854 width=116) + PTF Operator [PTF_135] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_132] (rows=20854 width=116) + Select Operator [SEL_134] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_20] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_131] - Select Operator [SEL_130] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_129] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_128] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_127] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Select Operator [SEL_132] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_131] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_130] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_129] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_127] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_126] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_125] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_121] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Group By Operator [GBY_125] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_124] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_123] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_119] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0 - Group By Operator [GBY_117] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_116] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_115] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_119] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_118] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_117] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col1 - Select Operator [SEL_140] (rows=6951 width=8) + Select Operator [SEL_142] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_139] (rows=6951 width=116) + Filter Operator [FIL_141] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_138] (rows=20854 width=116) + PTF Operator [PTF_140] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_137] (rows=20854 width=116) + Select Operator [SEL_139] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_20] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_20] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out index 9ed7306c46..f5fe4b0ff3 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out @@ -111,88 +111,90 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_109] - Limit [LIM_108] (rows=100 width=617) + File Output Operator [FS_111] + Limit [LIM_110] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_107] (rows=1575989691 width=617) + Select Operator [SEL_109] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=1575989691 width=617) + SHUFFLE [RS_108] + Select Operator [SEL_107] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_104] (rows=1575989691 width=613) + Top N Key Operator [TNK_106] (rows=1575989691 width=613) keys:_col6, _col5, _col4, _col7, _col0, _col2, _col1, _col3, _col8, rank_window_0,top n:100 - Filter Operator [FIL_103] (rows=1575989691 width=613) + Filter Operator [FIL_105] (rows=1575989691 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_102] (rows=4727969073 width=613) + PTF Operator [PTF_104] (rows=4727969073 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col6"}] - Select Operator [SEL_101] (rows=4727969073 width=613) + Select Operator [SEL_103] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_100] + SHUFFLE [RS_102] PartitionCols:_col6 - Select Operator [SEL_99] (rows=4727969073 width=613) + Select Operator [SEL_101] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_98] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_20] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L - Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=613) - Conds:RS_16._col1=RS_97._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - PartitionCols:_col0 - Select Operator [SEL_96] (rows=462000 width=393) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_8] (rows=462000 width=393) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=228) - Conds:RS_13._col2=RS_95._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_94] (rows=1704 width=104) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_80] (rows=525329897 width=131) - Conds:RS_93._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_85] - PartitionCols:_col0 - Select Operator [SEL_84] (rows=317 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=317 width=20) - predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=20) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] - PartitionCols:_col0 - Select Operator [SEL_92] (rows=525329897 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_91] (rows=525329897 width=122) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_90] - Group By Operator [GBY_89] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_88] - Group By Operator [GBY_87] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_86] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_84] + Top N Key Operator [TNK_100] (rows=4727969073 width=621) + PartitionCols:_col6,keys:_col6, _col9,top n:101 + Group By Operator [GBY_99] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Group By Operator [GBY_20] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L + Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=613) + Conds:RS_16._col1=RS_98._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_98] + PartitionCols:_col0 + Select Operator [SEL_97] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_8] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=228) + Conds:RS_13._col2=RS_96._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_95] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=131) + Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=317 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_84] (rows=317 width=20) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + PartitionCols:_col0 + Select Operator [SEL_93] (rows=525329897 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_92] (rows=525329897 width=122) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_87] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_85] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out index 40dfaa2f13..b2bde8d48c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out @@ -100,27 +100,27 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_172] - Limit [LIM_171] (rows=100 width=492) + File Output Operator [FS_174] + Limit [LIM_173] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_170] (rows=720 width=492) + Select Operator [SEL_172] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] - Select Operator [SEL_168] (rows=720 width=492) + SHUFFLE [RS_171] + Select Operator [SEL_170] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_167] (rows=720 width=304) + Top N Key Operator [TNK_169] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_166] (rows=720 width=304) + PTF Operator [PTF_168] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_165] (rows=720 width=304) + Select Operator [SEL_167] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_166] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_163] (rows=720 width=304) + Select Operator [SEL_165] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_162] (rows=720 width=304) + Group By Operator [GBY_164] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -129,90 +129,92 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L Select Operator [SEL_47] (rows=525329897 width=290) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_137] (rows=525329897 width=290) + Merge Join Operator [MERGEJOIN_138] (rows=525329897 width=290) Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=110) - Conds:RS_148._col0=RS_140._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=110) + Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_139] (rows=317 width=8) + Select Operator [SEL_140] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_138] (rows=317 width=8) + Filter Operator [FIL_139] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 TableScan [TS_3] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_147] (rows=525329897 width=114) + Select Operator [SEL_148] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_146] (rows=525329897 width=114) + Filter Operator [FIL_147] (rows=525329897 width=114) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_42_d1_d_date_sk_min) AND DynamicValue(RS_42_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_42_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) + SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=317 width=4) + Select Operator [SEL_142] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] + Please refer to the previous Select Operator [SEL_140] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_136] (rows=556 width=188) - Conds:RS_161._col2=RS_158._col0(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_137] (rows=556 width=188) + Conds:RS_163._col2=RS_160._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_163] PartitionCols:_col2 - Select Operator [SEL_160] (rows=1704 width=188) + Select Operator [SEL_162] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_159] (rows=1704 width=188) + Filter Operator [FIL_161] (rows=1704 width=188) predicate:s_state is not null TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] + SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_157] (rows=16 width=86) + Select Operator [SEL_159] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_156] (rows=16 width=198) + Filter Operator [FIL_158] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_155] (rows=49 width=198) + PTF Operator [PTF_157] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_154] (rows=49 width=198) + Select Operator [SEL_156] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_155] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=49 width=198) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=19404 width=198) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) - Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_133] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - PartitionCols:_col0 - Select Operator [SEL_150] (rows=1704 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=1704 width=90) - predicate:s_state is not null - TableScan [TS_15] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + Top N Key Operator [TNK_154] (rows=49 width=198) + PartitionCols:_col0,keys:_col0, _col1,top n:6 + Group By Operator [GBY_153] (rows=49 width=198) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=19404 width=198) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_136] (rows=525329897 width=192) + Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_134] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=1704 width=90) + predicate:s_state is not null + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] diff --git ql/src/test/results/clientpositive/perf/tez/query44.q.out ql/src/test/results/clientpositive/perf/tez/query44.q.out index a0defab8b3..646bb9341e 100644 --- ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -91,10 +91,10 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_148] - Limit [LIM_147] (rows=100 width=218) + File Output Operator [FS_150] + Limit [LIM_149] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_146] (rows=6951 width=218) + Select Operator [SEL_148] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -102,102 +102,106 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col3,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) Conds:RS_69._col3=RS_70._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_111] (rows=6951 width=111) - Conds:RS_117._col0=RS_140._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111) + Conds:RS_119._col0=RS_142._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_116] (rows=462000 width=111) + Select Operator [SEL_118] (rows=462000 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=462000 width=111) + Filter Operator [FIL_117] (rows=462000 width=111) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + SHUFFLE [RS_142] PartitionCols:_col0 - Select Operator [SEL_139] (rows=6951 width=8) + Select Operator [SEL_141] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=6951 width=116) + Filter Operator [FIL_140] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_137] (rows=20854 width=116) + PTF Operator [PTF_139] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_136] (rows=20854 width=116) + Select Operator [SEL_138] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:0 - Filter Operator [FIL_23] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_135] - Select Operator [SEL_134] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_133] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_132] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_131] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_23] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_135] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_134] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_133] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Group By Operator [GBY_131] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_130] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_129] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_11] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_126] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_125] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_129] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_128] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_127] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_11] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_123] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - PartitionCols:_col0 - Group By Operator [GBY_121] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_120] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_119] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_3] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_123] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_122] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_121] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_3] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111) - Conds:RS_118._col0=RS_145._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=111) + Conds:RS_120._col0=RS_147._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_120] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_116] + Please refer to the previous Select Operator [SEL_118] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + SHUFFLE [RS_147] PartitionCols:_col0 - Select Operator [SEL_144] (rows=6951 width=8) + Select Operator [SEL_146] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_143] (rows=6951 width=116) + Filter Operator [FIL_145] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_142] (rows=20854 width=116) + PTF Operator [PTF_144] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_141] (rows=20854 width=116) + Select Operator [SEL_143] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_23] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_23] diff --git ql/src/test/results/clientpositive/perf/tez/query67.q.out ql/src/test/results/clientpositive/perf/tez/query67.q.out index 54d76e9c1a..bd225cdf87 100644 --- ql/src/test/results/clientpositive/perf/tez/query67.q.out +++ ql/src/test/results/clientpositive/perf/tez/query67.q.out @@ -111,92 +111,94 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_113] - Limit [LIM_112] (rows=100 width=617) + File Output Operator [FS_115] + Limit [LIM_114] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_111] (rows=1575989691 width=617) + Select Operator [SEL_113] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=1575989691 width=617) + SHUFFLE [RS_112] + Select Operator [SEL_111] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_108] (rows=1575989691 width=613) + Top N Key Operator [TNK_110] (rows=1575989691 width=613) keys:_col2, _col1, _col0, _col3, _col4, _col6, _col5, _col7, _col8, rank_window_0,top n:100 - Filter Operator [FIL_107] (rows=1575989691 width=613) + Filter Operator [FIL_109] (rows=1575989691 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_106] (rows=4727969073 width=613) + PTF Operator [PTF_108] (rows=4727969073 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_105] (rows=4727969073 width=613) + Select Operator [SEL_107] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] + SHUFFLE [RS_106] PartitionCols:_col2 - Select Operator [SEL_103] (rows=4727969073 width=613) + Select Operator [SEL_105] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_102] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_22] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L - Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=613) - Conds:RS_18._col1=RS_101._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_100] (rows=462000 width=393) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_99] (rows=462000 width=393) - predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=393) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=228) - Conds:RS_15._col2=RS_98._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - PartitionCols:_col0 - Select Operator [SEL_97] (rows=1704 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_96] (rows=1704 width=104) - predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=131) - Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_87] - PartitionCols:_col0 - Select Operator [SEL_86] (rows=317 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_85] (rows=317 width=20) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=20) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_94] (rows=525329897 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_93] (rows=525329897 width=122) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_90] - Group By Operator [GBY_89] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_86] + Top N Key Operator [TNK_104] (rows=4727969073 width=621) + PartitionCols:_col2,keys:_col2, _col9,top n:101 + Group By Operator [GBY_103] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Group By Operator [GBY_22] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L + Merge Join Operator [MERGEJOIN_85] (rows=525329897 width=613) + Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_100] (rows=462000 width=393) + predicate:i_item_sk is not null + TableScan [TS_9] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=228) + Conds:RS_15._col2=RS_99._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=1704 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_97] (rows=1704 width=104) + predicate:s_store_sk is not null + TableScan [TS_6] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=131) + Conds:RS_96._col0=RS_88._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_88] + PartitionCols:_col0 + Select Operator [SEL_87] (rows=317 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_86] (rows=317 width=20) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_95] (rows=525329897 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_94] (rows=525329897 width=122) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_93] + Group By Operator [GBY_92] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_89] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_87] diff --git ql/src/test/results/clientpositive/perf/tez/query70.q.out ql/src/test/results/clientpositive/perf/tez/query70.q.out index 23f9166a4f..6107ec0367 100644 --- ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -100,27 +100,27 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_170] - Limit [LIM_169] (rows=100 width=492) + File Output Operator [FS_172] + Limit [LIM_171] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_168] (rows=720 width=492) + Select Operator [SEL_170] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] - Select Operator [SEL_166] (rows=720 width=492) + SHUFFLE [RS_169] + Select Operator [SEL_168] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_165] (rows=720 width=304) + Top N Key Operator [TNK_167] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_164] (rows=720 width=304) + PTF Operator [PTF_166] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_163] (rows=720 width=304) + Select Operator [SEL_165] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_164] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_161] (rows=720 width=304) + Select Operator [SEL_163] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_160] (rows=720 width=304) + Group By Operator [GBY_162] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_49] @@ -129,89 +129,91 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L Select Operator [SEL_46] (rows=171536292 width=280) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_135] (rows=171536292 width=280) - Conds:RS_43._col7=RS_159._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_136] (rows=171536292 width=280) + Conds:RS_43._col7=RS_161._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_161] PartitionCols:_col0 - Select Operator [SEL_158] (rows=16 width=86) + Select Operator [SEL_160] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_157] (rows=16 width=198) + Filter Operator [FIL_159] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_156] (rows=49 width=198) + PTF Operator [PTF_158] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_155] (rows=49 width=198) + Select Operator [SEL_157] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] + SHUFFLE [RS_156] PartitionCols:_col0 - Group By Operator [GBY_153] (rows=49 width=198) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=19404 width=198) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=192) - Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_131] (rows=525329897 width=110) - Conds:RS_146._col0=RS_138._col0(Inner),Output:["_col1","_col2"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - PartitionCols:_col0 - Select Operator [SEL_137] (rows=317 width=8) - Output:["_col0"] - Filter Operator [FIL_136] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - PartitionCols:_col0 - Select Operator [SEL_145] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_144] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_139] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_137] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] - PartitionCols:_col0 - Select Operator [SEL_151] (rows=1704 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=1704 width=90) - predicate:(s_store_sk is not null and s_state is not null) - TableScan [TS_15] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + Top N Key Operator [TNK_155] (rows=49 width=198) + PartitionCols:_col0,keys:_col0, _col1,top n:6 + Group By Operator [GBY_154] (rows=49 width=198) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=19404 width=198) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) + Conds:RS_21._col1=RS_153._col0(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=110) + Conds:RS_147._col0=RS_139._col0(Inner),Output:["_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Select Operator [SEL_138] (rows=317 width=8) + Output:["_col0"] + Filter Operator [FIL_137] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col0 + Select Operator [SEL_146] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_145] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] + Group By Operator [GBY_141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_140] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_138] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_152] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=1704 width=90) + predicate:(s_store_sk is not null and s_state is not null) + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=290) - Conds:RS_40._col1=RS_149._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=290) + Conds:RS_40._col1=RS_150._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_131] + Please refer to the previous Merge Join Operator [MERGEJOIN_132] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_148] (rows=1704 width=188) + Select Operator [SEL_149] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_147] (rows=1704 width=188) + Filter Operator [FIL_148] (rows=1704 width=188) predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] diff --git ql/src/test/results/clientpositive/topnkey_windowing.q.out ql/src/test/results/clientpositive/topnkey_windowing.q.out new file mode 100644 index 0000000000..c186790bea --- /dev/null +++ ql/src/test/results/clientpositive/topnkey_windowing.q.out @@ -0,0 +1,576 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [$hdt$_0:topnkey_windowing] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: dense_rank_window_0 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (dense_rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), dense_rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/topnkey_windowing_order.q.out ql/src/test/results/clientpositive/topnkey_windowing_order.q.out new file mode 100644 index 0000000000..7a9a67d001 --- /dev/null +++ ql/src/test/results/clientpositive/topnkey_windowing_order.q.out @@ -0,0 +1,517 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + sort order: ++ + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + sort order: ++- + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2, _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2681 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + sort order: +++ + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 3220 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing