diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index b7c1250220..0ccaeea1da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -28,6 +28,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY; @@ -38,10 +41,13 @@ private static final long serialVersionUID = 1L; - private transient TopNKeyFilter topNKeyFilter; + private transient Map> topNKeyFilters; + private transient KeyWrapper partitionKeyWrapper; private transient KeyWrapper keyWrapper; + private transient KeyWrapperComparator keyWrapperComparator; + /** Kryo ctor. */ public TopNKeyOperator() { super(); @@ -55,38 +61,62 @@ public TopNKeyOperator(CompilationOpContext ctx) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - String columnSortOrder = conf.getColumnSortOrder(); - String nullSortOrder = conf.getNullOrder(); - ObjectInspector rowInspector = inputObjInspectors[0]; outputObjInspector = rowInspector; + int numPartitionKeys = conf.getPartitionKeyColumns().size(); + List keyColumns = conf.getKeyColumns().subList(numPartitionKeys, conf.getKeyColumns().size()); + String columnSortOrder = conf.getColumnSortOrder().substring(numPartitionKeys); + String nullSortOrder = conf.getNullOrder().substring(numPartitionKeys); + // init keyFields - int numKeys = conf.getKeyColumns().size(); - ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numKeys]; - ObjectInspector[] keyObjectInspectors = new ObjectInspector[numKeys]; - ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[numKeys]; + ObjectInspector[] keyObjectInspectors = new ObjectInspector[keyColumns.size()]; + ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[keyColumns.size()]; + keyWrapper = initObjectInspectors(hconf, keyColumns, rowInspector, keyObjectInspectors, currentKeyObjectInspectors); + ObjectInspector[] partitionKeyObjectInspectors = new ObjectInspector[numPartitionKeys]; + ObjectInspector[] partitionCurrentKeyObjectInspectors = new ObjectInspector[numPartitionKeys]; + partitionKeyWrapper = initObjectInspectors(hconf, conf.getPartitionKeyColumns(), rowInspector, + partitionKeyObjectInspectors, partitionCurrentKeyObjectInspectors); + + keyWrapperComparator = new KeyWrapperComparator( + keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder.toString(), nullSortOrder.toString()); + + this.topNKeyFilters = new HashMap<>(); + } - for (int i = 0; i < numKeys; i++) { - ExprNodeDesc key = conf.getKeyColumns().get(i); + private KeyWrapper initObjectInspectors(Configuration hconf, + List keyColumns, + ObjectInspector rowInspector, + ObjectInspector[] keyObjectInspectors, + ObjectInspector[] currentKeyObjectInspectors) throws HiveException { + ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[keyColumns.size()]; + for (int i = 0; i < keyColumns.size(); i++) { + ExprNodeDesc key = keyColumns.get(i); keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], - ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE); + ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE); } - this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), new KeyWrapperComparator( - keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder, nullSortOrder)); - KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors); - keyWrapper = keyWrapperFactory.getKeyWrapper(); + return keyWrapperFactory.getKeyWrapper(); } @Override public void process(Object row, int tag) throws HiveException { + partitionKeyWrapper.getNewKey(row, inputObjInspectors[tag]); + partitionKeyWrapper.setHashKey(); + + TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKeyWrapper); + if (topNKeyFilter == null) { + topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), keyWrapperComparator); + topNKeyFilters.put(partitionKeyWrapper.copyKey(), topNKeyFilter); + } + keyWrapper.getNewKey(row, inputObjInspectors[tag]); keyWrapper.setHashKey(); + if (topNKeyFilter.canForward(keyWrapper)) { forward(row, outputObjInspector); } @@ -94,7 +124,7 @@ public void process(Object row, int tag) throws HiveException { @Override protected final void closeOp(boolean abort) throws HiveException { - topNKeyFilter.clear(); + topNKeyFilters.clear(); super.closeOp(abort); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java index ce6efa4919..a9ff6b4a83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java @@ -26,12 +26,14 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Collections; import java.util.List; import java.util.Stack; @@ -58,20 +60,25 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - // Currently, per partitioning top n key is not supported - // in TopNKey operator - if (reduceSinkDesc.isPTFReduceSink()) { - return null; - } - // Check whether there already is a top n key operator Operator parentOperator = reduceSinkOperator.getParentOperators().get(0); if (parentOperator instanceof TopNKeyOperator) { return null; } + List partitionCols = Collections.emptyList(); + if (reduceSinkDesc.isPTFReduceSink()) { + // All keys are partition keys or no keys at all + // Note: partition cols are prefix of key cols + if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) { + return null; + } + + partitionCols = reduceSinkDesc.getPartitionCols(); + } + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), - reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols()); + reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols); copyDown(reduceSinkOperator, topNKeyDesc); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java index 348fbb5faf..d2ae723574 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java @@ -154,13 +154,12 @@ private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticExce final TopNKeyDesc topNKeyDesc = topNKey.getConf(); CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), commonKeyPrefix.getMappedOrder(), - commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(groupBy, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { @@ -184,13 +183,12 @@ private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticE final TopNKeyDesc topNKeyDesc = topNKey.getConf(); CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, reduceSinkDesc); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), reduceSink.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), - commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(reduceSink, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { @@ -242,14 +240,13 @@ private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws Semant reduceSinkDesc.getColumnExprMap(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder()); - if (commonKeyPrefix.isEmpty()) { + if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), reduceSinkOperator.getName()); - final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), - commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java index a9e2a527df..b8df64d0d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.optimizer.topnkey.CommonKeyPrefix; import org.apache.hadoop.hive.ql.plan.Explain.Level; import java.util.ArrayList; @@ -29,12 +30,14 @@ */ @Explain(displayName = "Top N Key Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TopNKeyDesc extends AbstractOperatorDesc { + private static final long serialVersionUID = 1L; private int topN; private String columnSortOrder; private String nullOrder; private List keyColumns; + private List partitionKeyColumns; public TopNKeyDesc() { } @@ -43,12 +46,14 @@ public TopNKeyDesc( final int topN, final String columnSortOrder, final String nullOrder, - final List keyColumns) { + final List keyColumns, + final List partitionKeyColumns) { this.topN = topN; this.columnSortOrder = columnSortOrder; this.nullOrder = nullOrder; this.keyColumns = keyColumns; + this.partitionKeyColumns = partitionKeyColumns; } @Explain(displayName = "top n", explainLevels = { Level.DEFAULT, Level.EXTENDED, Level.USER }) @@ -104,6 +109,25 @@ public void setKeyColumns(List keyColumns) { return ret; } + public List getPartitionKeyColumns() { + return partitionKeyColumns; + } + + public void setPartitionKeyColumns(List partitionKeyColumns) { + this.partitionKeyColumns = partitionKeyColumns; + } + + @Explain(displayName = "Map-reduce partition columns") + public String getPartitionKeyString() { + return PlanUtils.getExprListString(partitionKeyColumns); + } + + @Explain(displayName = "PartitionCols", explainLevels = { Level.USER }) + public String getUserLevelExplainPartitionKeyString() { + return PlanUtils.getExprListString(partitionKeyColumns, true); + } + + @Override public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { @@ -150,4 +174,11 @@ public TopNKeyDescExplainVectorization getTopNKeyVectorization() { } return new TopNKeyDescExplainVectorization(this, vectorTopNKeyDesc); } + + public TopNKeyDesc combine(CommonKeyPrefix commonKeyPrefix) { + return new TopNKeyDesc(topN, commonKeyPrefix.getMappedOrder(), + commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns(), + commonKeyPrefix.getMappedColumns().subList(0, partitionKeyColumns.size())); + } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java new file mode 100644 index 0000000000..90c0175544 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertThat; + +import java.util.Comparator; +import java.util.Objects; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.junit.Test; + +/** + * Unit test of TopNKeyFilter. + */ +public class TestTopNKeyFilter { + + public static final Comparator TEST_KEY_WRAPPER_COMPARATOR = Comparator.comparingInt(o -> o.keyValue); + + @Test + public void testNothingCanBeForwardedIfTopNIs0() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(0, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false)); + } + + @Test + public void testFirstTopNKeysCanBeForwarded() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(3, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false)); + } + + @Test + public void testKeyCanNotBeForwardedIfItIsDroppedOutFromTopNKeys() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(2)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(false)); + } + + @Test + public void testMembersOfTopNKeysStillCanBeForwardedAfterNonTopNKeysTried() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(false)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + } + + /** + * Test implementation of KeyWrapper. + */ + private static class TestKeyWrapper extends KeyWrapper { + + private final int keyValue; + + public TestKeyWrapper(int keyValue) { + this.keyValue = keyValue; + } + + @Override + public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException { + + } + + @Override + public void setHashKey() { + + } + + @Override + public KeyWrapper copyKey() { + return new TestKeyWrapper(this.keyValue); + } + + @Override + public void copyKey(KeyWrapper oldWrapper) { + + } + + @Override + public Object[] getKeyArray() { + return new Object[0]; + } + + @Override + public boolean isCopy() { + return false; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TestKeyWrapper that = (TestKeyWrapper) o; + return keyValue == that.keyValue; + } + + @Override + public int hashCode() { + return Objects.hash(keyValue); + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java index 51d2382278..a3e43ceb38 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java @@ -182,4 +182,4 @@ public void testmapWhenKeyCountsMismatch() { assertThat(commonPrefix.getMappedOrder(), is("+")); assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); } -} +} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q index a5b3ce7951..96ed1bae41 100644 --- ql/src/test/queries/clientpositive/subquery_in.q +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -3,6 +3,7 @@ --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q index f8636453c2..f25168ab77 100644 --- ql/src/test/queries/clientpositive/subquery_notin.q +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -2,6 +2,7 @@ --! qt:dataset:part --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; +set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/topnkey_windowing.q ql/src/test/queries/clientpositive/topnkey_windowing.q new file mode 100644 index 0000000000..a5352d2d6c --- /dev/null +++ ql/src/test/queries/clientpositive/topnkey_windowing.q @@ -0,0 +1,110 @@ +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1431655765; +SET hive.vectorized.execution.enabled=false; + + +CREATE TABLE topnkey_windowing (tw_code string, tw_value double); +INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +DROP TABLE topnkey_windowing; diff --git ql/src/test/queries/clientpositive/topnkey_windowing_order.q ql/src/test/queries/clientpositive/topnkey_windowing_order.q new file mode 100644 index 0000000000..686d72a3cd --- /dev/null +++ ql/src/test/queries/clientpositive/topnkey_windowing_order.q @@ -0,0 +1,109 @@ +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1431655765; +SET hive.vectorized.execution.enabled=false; + + +CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double); +INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.optimize.topnkey=false; +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +DROP TABLE topnkey_windowing; diff --git ql/src/test/queries/clientpositive/vector_windowing_streaming.q ql/src/test/queries/clientpositive/vector_windowing_streaming.q index e1011f9949..2f7b628db3 100644 --- ql/src/test/queries/clientpositive/vector_windowing_streaming.q +++ ql/src/test/queries/clientpositive/vector_windowing_streaming.q @@ -5,6 +5,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; set hive.vectorized.execution.ptf.enabled=true; set hive.fetch.task.conversion=none; +set hive.optimize.topnkey=false; drop table over10k_n8; diff --git ql/src/test/queries/clientpositive/windowing_filter.q ql/src/test/queries/clientpositive/windowing_filter.q index 2483c18416..14d0c5a7c8 100644 --- ql/src/test/queries/clientpositive/windowing_filter.q +++ ql/src/test/queries/clientpositive/windowing_filter.q @@ -1,5 +1,6 @@ set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=1431655765; +set hive.optimize.topnkey=false; create table testtable_n1000 (s_state string, ss_net_profit double); diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 24b16598e2..479e60fda9 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -3434,19 +3434,13 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col3 (type: string) + Reduce Output Operator + key expressions: _col3 (type: string) null sort order: z + sort order: + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - top n: 4 - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 6751eda7d3..3dc247591c 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -4512,7 +4512,7 @@ POSTHOOK: Input: default@part 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4615,23 +4615,17 @@ STAGE PLANS: Filter Operator predicate: ((_col12 is null or (_col9 = 0L)) and ((_col10 >= _col9) or (_col9 = 0L) or _col12 is not null or _col5 is null) and (_col5 is not null or (_col9 = 0L) or _col12 is not null)) (type: boolean) Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col3 (type: string), _col0 (type: int) - null sort order: zz - Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE - top n: 4 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4727,7 +4721,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_functions.q.out ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_functions.q.out index 0ec6e6de43..19f57ab4fa 100644 --- ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_functions.q.out +++ ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_functions.q.out @@ -413,8 +413,8 @@ NULL 1 2 5 2 3 NULL 2 3 6 2 1 -NULL 8 4 7 8 4 +NULL 8 4 PREHOOK: query: SELECT a, b, max(c) FROM t_test_grouping_sets GROUP BY a,b GROUPING SETS ((a,b), (a), (b), ()) ORDER BY b LIMIT 7 PREHOOK: type: QUERY PREHOOK: Input: default@t_test_grouping_sets @@ -428,8 +428,8 @@ NULL 1 2 5 2 3 NULL 2 3 6 2 1 -7 8 4 NULL 8 4 +7 8 4 PREHOOK: query: DROP TABLE IF EXISTS t_test_grouping_sets PREHOOK: type: DROPTABLE PREHOOK: Input: default@t_test_grouping_sets diff --git ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out index d3ff1a6256..7ee27dc7b7 100644 --- ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out +++ ql/src/test/results/clientpositive/llap/topnkey_grouping_sets_order.q.out @@ -285,8 +285,8 @@ NULL 1 5 2 NULL 2 6 2 -NULL 8 7 8 +NULL 8 PREHOOK: query: EXPLAIN SELECT a, b FROM t_test_grouping_sets GROUP BY a,b GROUPING SETS ((a,b), (a), (b), ()) ORDER BY a DESC, b ASC LIMIT 7 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out new file mode 100644 index 0000000000..26122a625d --- /dev/null +++ ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out @@ -0,0 +1,627 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_code (type: string), tw_value (type: double) + null sort order: az + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Top N Key Operator + sort order: ++ + keys: 0 (type: int), tw_value (type: double) + null sort order: az + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [topnkey_windowing] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_code (type: string), tw_value (type: double) + null sort order: az + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: dense_rank_window_0 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (dense_rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), dense_rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out new file mode 100644 index 0000000000..9721705b26 --- /dev/null +++ ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out @@ -0,0 +1,568 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + sort order: ++ + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++- + keys: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + sort order: ++- + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2, _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2378 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: +++ + keys: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + sort order: +++ + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 786bbe12c8..c1340962fb 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -19,7 +19,7 @@ POSTHOOK: type: CREATE_MATERIALIZED_VIEW POSTHOOK: Input: default@store_sales POSTHOOK: Output: database:default POSTHOOK: Output: default@mv_store_sales_item_customer -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -112,10 +112,10 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_147] - Limit [LIM_146] (rows=100 width=218) + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_145] (rows=6951 width=218) + Select Operator [SEL_147] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_70] @@ -123,100 +123,104 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) - Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"] + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) + Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_142] (rows=462000 width=111) + Select Operator [SEL_144] (rows=462000 width=111) Output:["_col0","_col1"] TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115) - Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) + Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + SHUFFLE [RS_145] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_142] + Please refer to the previous Select Operator [SEL_144] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12) - Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) + Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_138] PartitionCols:_col1 - Select Operator [SEL_135] (rows=6951 width=8) + Select Operator [SEL_137] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=6951 width=116) + Filter Operator [FIL_136] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_133] (rows=20854 width=116) + PTF Operator [PTF_135] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_132] (rows=20854 width=116) + Select Operator [SEL_134] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_20] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_131] - Select Operator [SEL_130] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_129] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_128] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_127] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Select Operator [SEL_132] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_131] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_130] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_129] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_127] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_126] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_125] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_121] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Group By Operator [GBY_125] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_124] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_123] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_119] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0 - Group By Operator [GBY_117] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_116] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_115] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_119] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_118] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_117] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col1 - Select Operator [SEL_140] (rows=6951 width=8) + Select Operator [SEL_142] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_139] (rows=6951 width=116) + Filter Operator [FIL_141] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_138] (rows=20854 width=116) + PTF Operator [PTF_140] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_137] (rows=20854 width=116) + Select Operator [SEL_139] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_20] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_20] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index eace7a0685..8a201c2ef5 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -91,10 +91,10 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_147] - Limit [LIM_146] (rows=100 width=218) + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_145] (rows=6951 width=218) + Select Operator [SEL_147] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_70] @@ -102,100 +102,104 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) - Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"] + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) + Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_142] (rows=462000 width=111) + Select Operator [SEL_144] (rows=462000 width=111) Output:["_col0","_col1"] TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115) - Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) + Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + SHUFFLE [RS_145] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_142] + Please refer to the previous Select Operator [SEL_144] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12) - Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) + Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_138] PartitionCols:_col1 - Select Operator [SEL_135] (rows=6951 width=8) + Select Operator [SEL_137] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=6951 width=116) + Filter Operator [FIL_136] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_133] (rows=20854 width=116) + PTF Operator [PTF_135] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_132] (rows=20854 width=116) + Select Operator [SEL_134] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_20] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_131] - Select Operator [SEL_130] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_129] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_128] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_127] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Select Operator [SEL_132] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_131] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_130] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_129] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_127] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_126] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_125] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_121] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Group By Operator [GBY_125] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_124] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_123] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_119] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0 - Group By Operator [GBY_117] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_116] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_115] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_119] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_118] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_117] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col1 - Select Operator [SEL_140] (rows=6951 width=8) + Select Operator [SEL_142] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_139] (rows=6951 width=116) + Filter Operator [FIL_141] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_138] (rows=20854 width=116) + PTF Operator [PTF_140] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_137] (rows=20854 width=116) + Select Operator [SEL_139] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_20] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_20] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out index 37fc51697b..91a900ed61 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out @@ -299,46 +299,46 @@ Stage-0 limit:100 Stage-1 Reducer 11 vectorized - File Output Operator [FS_315] - Limit [LIM_314] (rows=100 width=215) + File Output Operator [FS_313] + Limit [LIM_312] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_313] (rows=40436 width=215) + Select Operator [SEL_311] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - Select Operator [SEL_311] (rows=40436 width=215) + SHUFFLE [RS_310] + Select Operator [SEL_309] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_310] (rows=40436 width=215) + Group By Operator [GBY_308] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 9 [SIMPLE_EDGE] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_356] + Reduce Output Operator [RS_354] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_355] (rows=40436 width=215) + Group By Operator [GBY_353] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_354] (rows=40436 width=214) - keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_353] (rows=14232 width=213) + Top N Key Operator [TNK_352] (rows=40436 width=214) + keys:_col0, _col3, _col4,top n:100 + Select Operator [SEL_351] (rows=14232 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_352] (rows=14232 width=248) + Filter Operator [FIL_350] (rows=14232 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_351] (rows=21349 width=248) + PTF Operator [PTF_349] (rows=21349 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_350] (rows=21349 width=248) + Select Operator [SEL_348] (rows=21349 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] + SHUFFLE [RS_347] PartitionCols:0 - Select Operator [SEL_348] (rows=21349 width=244) + Select Operator [SEL_346] (rows=21349 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_347] (rows=21349 width=244) + PTF Operator [PTF_345] (rows=21349 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_346] (rows=21349 width=244) + Select Operator [SEL_344] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + SHUFFLE [RS_343] PartitionCols:0 - Group By Operator [GBY_344] (rows=21349 width=244) + Group By Operator [GBY_342] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_89] @@ -347,89 +347,89 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_86] (rows=20856667 width=216) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_242] (rows=20856667 width=216) - Conds:RS_83._col1, _col2=RS_343._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_240] (rows=20856667 width=216) + Conds:RS_83._col1, _col2=RS_341._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + SHUFFLE [RS_341] PartitionCols:_col0, _col1 - Select Operator [SEL_342] (rows=19197050 width=119) + Select Operator [SEL_340] (rows=19197050 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_341] (rows=19197050 width=119) + Filter Operator [FIL_339] (rows=19197050 width=119) predicate:(sr_return_amt > 10000) TableScan [TS_77] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_83] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_241] (rows=61119617 width=118) - Conds:RS_340._col0=RS_277._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_239] (rows=61119617 width=118) + Conds:RS_338._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_277] + PARTITION_ONLY_SHUFFLE [RS_275] PartitionCols:_col0 - Select Operator [SEL_272] (rows=50 width=4) + Select Operator [SEL_270] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_271] (rows=50 width=12) + Filter Operator [FIL_269] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + SHUFFLE [RS_338] PartitionCols:_col0 - Select Operator [SEL_339] (rows=61119617 width=229) + Select Operator [SEL_337] (rows=61119617 width=229) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_338] (rows=61119617 width=229) + Filter Operator [FIL_336] (rows=61119617 width=229) predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) TableScan [TS_71] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_337] - Group By Operator [GBY_336] (rows=1 width=12) + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_284] - Group By Operator [GBY_281] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_282] + Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=50 width=4) + Select Operator [SEL_276] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_270] <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_309] + Reduce Output Operator [RS_307] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_308] (rows=40436 width=215) + Group By Operator [GBY_306] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_307] (rows=40436 width=214) - keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_306] (rows=26204 width=215) + Top N Key Operator [TNK_305] (rows=40436 width=214) + keys:_col0, _col3, _col4,top n:100 + Select Operator [SEL_304] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_305] (rows=26204 width=215) + Group By Operator [GBY_303] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_335] + Reduce Output Operator [RS_333] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_334] (rows=26204 width=215) + Group By Operator [GBY_332] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_333] (rows=12574 width=215) + Select Operator [SEL_331] (rows=12574 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_332] (rows=12574 width=248) + Filter Operator [FIL_330] (rows=12574 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_331] (rows=18863 width=248) + PTF Operator [PTF_329] (rows=18863 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_330] (rows=18863 width=248) + Select Operator [SEL_328] (rows=18863 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + SHUFFLE [RS_327] PartitionCols:0 - Select Operator [SEL_328] (rows=18863 width=244) + Select Operator [SEL_326] (rows=18863 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_327] (rows=18863 width=244) + PTF Operator [PTF_325] (rows=18863 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_326] (rows=18863 width=244) + Select Operator [SEL_324] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + SHUFFLE [RS_323] PartitionCols:0 - Group By Operator [GBY_324] (rows=18863 width=244) + Group By Operator [GBY_322] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -438,72 +438,72 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_47] (rows=9599627 width=231) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_240] (rows=9599627 width=231) - Conds:RS_44._col1, _col2=RS_323._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_238] (rows=9599627 width=231) + Conds:RS_44._col1, _col2=RS_321._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + SHUFFLE [RS_321] PartitionCols:_col0, _col1 - Select Operator [SEL_322] (rows=9599627 width=121) + Select Operator [SEL_320] (rows=9599627 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_321] (rows=9599627 width=121) + Filter Operator [FIL_319] (rows=9599627 width=121) predicate:(cr_return_amount > 10000) TableScan [TS_38] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_239] (rows=31838858 width=123) - Conds:RS_320._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_237] (rows=31838858 width=123) + Conds:RS_318._col0=RS_273._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_275] + PARTITION_ONLY_SHUFFLE [RS_273] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_270] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_318] PartitionCols:_col0 - Select Operator [SEL_319] (rows=31838858 width=239) + Select Operator [SEL_317] (rows=31838858 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_318] (rows=31838858 width=239) + Filter Operator [FIL_316] (rows=31838858 width=239) predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) TableScan [TS_32] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_317] - Group By Operator [GBY_316] (rows=1 width=12) + BROADCAST [RS_315] + Group By Operator [GBY_314] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_283] - Group By Operator [GBY_280] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_281] + Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_276] (rows=50 width=4) + Select Operator [SEL_274] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_270] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_304] + Reduce Output Operator [RS_302] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_303] (rows=26204 width=215) + Group By Operator [GBY_301] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_302] (rows=13630 width=211) + Select Operator [SEL_300] (rows=13630 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_301] (rows=13630 width=248) + Filter Operator [FIL_299] (rows=13630 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_300] (rows=20445 width=248) + PTF Operator [PTF_298] (rows=20445 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_299] (rows=20445 width=248) + Select Operator [SEL_297] (rows=20445 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_296] PartitionCols:0 - Select Operator [SEL_297] (rows=20445 width=244) + Select Operator [SEL_295] (rows=20445 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_296] (rows=20445 width=244) + PTF Operator [PTF_294] (rows=20445 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_295] (rows=20445 width=244) + Select Operator [SEL_293] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] + SHUFFLE [RS_292] PartitionCols:0 - Group By Operator [GBY_293] (rows=20445 width=244) + Group By Operator [GBY_291] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] @@ -512,44 +512,44 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_15] (rows=5227456 width=231) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_238] (rows=5227456 width=231) - Conds:RS_12._col1, _col2=RS_292._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_236] (rows=5227456 width=231) + Conds:RS_12._col1, _col2=RS_290._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + SHUFFLE [RS_290] PartitionCols:_col0, _col1 - Select Operator [SEL_291] (rows=4799489 width=118) + Select Operator [SEL_289] (rows=4799489 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_290] (rows=4799489 width=118) + Filter Operator [FIL_288] (rows=4799489 width=118) predicate:(wr_return_amt > 10000) TableScan [TS_6] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_237] (rows=15996318 width=123) - Conds:RS_289._col0=RS_273._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_235] (rows=15996318 width=123) + Conds:RS_287._col0=RS_271._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_273] + PARTITION_ONLY_SHUFFLE [RS_271] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_270] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_287] PartitionCols:_col0 - Select Operator [SEL_288] (rows=15996318 width=239) + Select Operator [SEL_286] (rows=15996318 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_287] (rows=15996318 width=239) + Filter Operator [FIL_285] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_286] - Group By Operator [GBY_285] (rows=1 width=12) + BROADCAST [RS_284] + Group By Operator [GBY_283] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_282] - Group By Operator [GBY_279] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_280] + Group By Operator [GBY_277] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_274] (rows=50 width=4) + Select Operator [SEL_272] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_270] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out index 9ed7306c46..f5fe4b0ff3 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out @@ -111,88 +111,90 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_109] - Limit [LIM_108] (rows=100 width=617) + File Output Operator [FS_111] + Limit [LIM_110] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_107] (rows=1575989691 width=617) + Select Operator [SEL_109] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=1575989691 width=617) + SHUFFLE [RS_108] + Select Operator [SEL_107] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_104] (rows=1575989691 width=613) + Top N Key Operator [TNK_106] (rows=1575989691 width=613) keys:_col6, _col5, _col4, _col7, _col0, _col2, _col1, _col3, _col8, rank_window_0,top n:100 - Filter Operator [FIL_103] (rows=1575989691 width=613) + Filter Operator [FIL_105] (rows=1575989691 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_102] (rows=4727969073 width=613) + PTF Operator [PTF_104] (rows=4727969073 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col6"}] - Select Operator [SEL_101] (rows=4727969073 width=613) + Select Operator [SEL_103] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_100] + SHUFFLE [RS_102] PartitionCols:_col6 - Select Operator [SEL_99] (rows=4727969073 width=613) + Select Operator [SEL_101] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_98] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_20] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L - Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=613) - Conds:RS_16._col1=RS_97._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - PartitionCols:_col0 - Select Operator [SEL_96] (rows=462000 width=393) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_8] (rows=462000 width=393) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=228) - Conds:RS_13._col2=RS_95._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_94] (rows=1704 width=104) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_80] (rows=525329897 width=131) - Conds:RS_93._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_85] - PartitionCols:_col0 - Select Operator [SEL_84] (rows=317 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=317 width=20) - predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=20) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] - PartitionCols:_col0 - Select Operator [SEL_92] (rows=525329897 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_91] (rows=525329897 width=122) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_90] - Group By Operator [GBY_89] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_88] - Group By Operator [GBY_87] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_86] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_84] + Top N Key Operator [TNK_100] (rows=4727969073 width=621) + PartitionCols:_col6,keys:_col6, _col9,top n:101 + Group By Operator [GBY_99] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Group By Operator [GBY_20] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L + Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=613) + Conds:RS_16._col1=RS_98._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_98] + PartitionCols:_col0 + Select Operator [SEL_97] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_8] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=228) + Conds:RS_13._col2=RS_96._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_95] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=131) + Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=317 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_84] (rows=317 width=20) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + PartitionCols:_col0 + Select Operator [SEL_93] (rows=525329897 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_92] (rows=525329897 width=122) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_87] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_85] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out index 40dfaa2f13..b2bde8d48c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out @@ -100,27 +100,27 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_172] - Limit [LIM_171] (rows=100 width=492) + File Output Operator [FS_174] + Limit [LIM_173] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_170] (rows=720 width=492) + Select Operator [SEL_172] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] - Select Operator [SEL_168] (rows=720 width=492) + SHUFFLE [RS_171] + Select Operator [SEL_170] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_167] (rows=720 width=304) + Top N Key Operator [TNK_169] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_166] (rows=720 width=304) + PTF Operator [PTF_168] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_165] (rows=720 width=304) + Select Operator [SEL_167] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_166] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_163] (rows=720 width=304) + Select Operator [SEL_165] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_162] (rows=720 width=304) + Group By Operator [GBY_164] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -129,90 +129,92 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L Select Operator [SEL_47] (rows=525329897 width=290) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_137] (rows=525329897 width=290) + Merge Join Operator [MERGEJOIN_138] (rows=525329897 width=290) Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=110) - Conds:RS_148._col0=RS_140._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=110) + Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_139] (rows=317 width=8) + Select Operator [SEL_140] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_138] (rows=317 width=8) + Filter Operator [FIL_139] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 TableScan [TS_3] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_147] (rows=525329897 width=114) + Select Operator [SEL_148] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_146] (rows=525329897 width=114) + Filter Operator [FIL_147] (rows=525329897 width=114) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_42_d1_d_date_sk_min) AND DynamicValue(RS_42_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_42_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) + SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=317 width=4) + Select Operator [SEL_142] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] + Please refer to the previous Select Operator [SEL_140] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_136] (rows=556 width=188) - Conds:RS_161._col2=RS_158._col0(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_137] (rows=556 width=188) + Conds:RS_163._col2=RS_160._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_163] PartitionCols:_col2 - Select Operator [SEL_160] (rows=1704 width=188) + Select Operator [SEL_162] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_159] (rows=1704 width=188) + Filter Operator [FIL_161] (rows=1704 width=188) predicate:s_state is not null TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] + SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_157] (rows=16 width=86) + Select Operator [SEL_159] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_156] (rows=16 width=198) + Filter Operator [FIL_158] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_155] (rows=49 width=198) + PTF Operator [PTF_157] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_154] (rows=49 width=198) + Select Operator [SEL_156] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_155] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=49 width=198) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=19404 width=198) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) - Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_133] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - PartitionCols:_col0 - Select Operator [SEL_150] (rows=1704 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=1704 width=90) - predicate:s_state is not null - TableScan [TS_15] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + Top N Key Operator [TNK_154] (rows=49 width=198) + PartitionCols:_col0,keys:_col0, _col1,top n:6 + Group By Operator [GBY_153] (rows=49 width=198) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=19404 width=198) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_136] (rows=525329897 width=192) + Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_134] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=1704 width=90) + predicate:s_state is not null + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] diff --git ql/src/test/results/clientpositive/perf/tez/query44.q.out ql/src/test/results/clientpositive/perf/tez/query44.q.out index a0defab8b3..646bb9341e 100644 --- ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -91,10 +91,10 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_148] - Limit [LIM_147] (rows=100 width=218) + File Output Operator [FS_150] + Limit [LIM_149] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_146] (rows=6951 width=218) + Select Operator [SEL_148] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -102,102 +102,106 @@ Stage-0 Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_99] (rows=6951 width=218) keys:_col3,top n:100 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218) + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) Conds:RS_69._col3=RS_70._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_111] (rows=6951 width=111) - Conds:RS_117._col0=RS_140._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111) + Conds:RS_119._col0=RS_142._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_116] (rows=462000 width=111) + Select Operator [SEL_118] (rows=462000 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=462000 width=111) + Filter Operator [FIL_117] (rows=462000 width=111) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + SHUFFLE [RS_142] PartitionCols:_col0 - Select Operator [SEL_139] (rows=6951 width=8) + Select Operator [SEL_141] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=6951 width=116) + Filter Operator [FIL_140] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_137] (rows=20854 width=116) + PTF Operator [PTF_139] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_136] (rows=20854 width=116) + Select Operator [SEL_138] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:0 - Filter Operator [FIL_23] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_135] - Select Operator [SEL_134] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_133] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_132] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_131] (rows=1 width=124) + Top N Key Operator [TNK_100] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Filter Operator [FIL_23] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_135] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_134] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_133] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Group By Operator [GBY_131] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_130] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_129] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_11] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=62562 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_126] (rows=62562 width=124) + predicate:(_col1 is not null and _col2 is not null) + Group By Operator [GBY_125] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_129] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_128] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_127] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_11] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=62562 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=62562 width=124) - predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_123] (rows=62562 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - PartitionCols:_col0 - Group By Operator [GBY_121] (rows=3199976 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_120] (rows=6399952 width=114) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_119] (rows=6399952 width=114) - predicate:(ss_store_sk = 410) - TableScan [TS_3] (rows=575995635 width=114) - default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Group By Operator [GBY_123] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_122] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_121] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_3] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111) - Conds:RS_118._col0=RS_145._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=111) + Conds:RS_120._col0=RS_147._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_120] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_116] + Please refer to the previous Select Operator [SEL_118] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + SHUFFLE [RS_147] PartitionCols:_col0 - Select Operator [SEL_144] (rows=6951 width=8) + Select Operator [SEL_146] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_143] (rows=6951 width=116) + Filter Operator [FIL_145] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_142] (rows=20854 width=116) + PTF Operator [PTF_144] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_141] (rows=20854 width=116) + Select Operator [SEL_143] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_23] + Top N Key Operator [TNK_101] (rows=20854 width=228) + PartitionCols:0,keys:0, _col1,top n:11 + Please refer to the previous Filter Operator [FIL_23] diff --git ql/src/test/results/clientpositive/perf/tez/query49.q.out ql/src/test/results/clientpositive/perf/tez/query49.q.out index 239592bc1d..94dadbab51 100644 --- ql/src/test/results/clientpositive/perf/tez/query49.q.out +++ ql/src/test/results/clientpositive/perf/tez/query49.q.out @@ -299,251 +299,251 @@ Stage-0 limit:100 Stage-1 Reducer 11 vectorized - File Output Operator [FS_312] - Limit [LIM_311] (rows=100 width=215) + File Output Operator [FS_310] + Limit [LIM_309] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_310] (rows=40436 width=215) + Select Operator [SEL_308] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - Select Operator [SEL_308] (rows=40436 width=215) + SHUFFLE [RS_307] + Select Operator [SEL_306] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_307] (rows=40436 width=215) + Group By Operator [GBY_305] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 9 [SIMPLE_EDGE] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_353] + Reduce Output Operator [RS_351] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_352] (rows=40436 width=215) + Group By Operator [GBY_350] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_351] (rows=40436 width=214) - keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_350] (rows=14232 width=213) + Top N Key Operator [TNK_349] (rows=40436 width=214) + keys:_col0, _col3, _col4,top n:100 + Select Operator [SEL_348] (rows=14232 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_349] (rows=14232 width=248) + Filter Operator [FIL_347] (rows=14232 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_348] (rows=21349 width=248) + PTF Operator [PTF_346] (rows=21349 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_347] (rows=21349 width=248) + Select Operator [SEL_345] (rows=21349 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + SHUFFLE [RS_344] PartitionCols:0 - Select Operator [SEL_345] (rows=21349 width=244) + Select Operator [SEL_343] (rows=21349 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_344] (rows=21349 width=244) + PTF Operator [PTF_342] (rows=21349 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_343] (rows=21349 width=244) + Select Operator [SEL_341] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + SHUFFLE [RS_340] PartitionCols:0 - Group By Operator [GBY_341] (rows=21349 width=244) + Group By Operator [GBY_339] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_86] PartitionCols:_col0 Group By Operator [GBY_85] (rows=426980 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_239] (rows=20856667 width=236) - Conds:RS_81._col1, _col2=RS_340._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_237] (rows=20856667 width=236) + Conds:RS_81._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + SHUFFLE [RS_338] PartitionCols:_col0, _col1 - Select Operator [SEL_339] (rows=19197050 width=124) + Select Operator [SEL_337] (rows=19197050 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_338] (rows=19197050 width=119) + Filter Operator [FIL_336] (rows=19197050 width=119) predicate:((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) TableScan [TS_75] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_81] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_238] (rows=61119617 width=124) - Conds:RS_337._col0=RS_274._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_236] (rows=61119617 width=124) + Conds:RS_335._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_274] + PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_269] (rows=50 width=4) + Select Operator [SEL_267] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_268] (rows=50 width=12) + Filter Operator [FIL_266] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + SHUFFLE [RS_335] PartitionCols:_col0 - Select Operator [SEL_336] (rows=61119617 width=127) + Select Operator [SEL_334] (rows=61119617 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_335] (rows=61119617 width=229) + Filter Operator [FIL_333] (rows=61119617 width=229) predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_79_date_dim_d_date_sk_min) AND DynamicValue(RS_79_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_79_date_dim_d_date_sk_bloom_filter))) TableScan [TS_69] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_334] - Group By Operator [GBY_333] (rows=1 width=12) + BROADCAST [RS_332] + Group By Operator [GBY_331] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_281] - Group By Operator [GBY_278] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_279] + Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_275] (rows=50 width=4) + Select Operator [SEL_273] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_267] <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_306] + Reduce Output Operator [RS_304] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_305] (rows=40436 width=215) + Group By Operator [GBY_303] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_304] (rows=40436 width=214) - keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_303] (rows=26204 width=215) + Top N Key Operator [TNK_302] (rows=40436 width=214) + keys:_col0, _col3, _col4,top n:100 + Select Operator [SEL_301] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_302] (rows=26204 width=215) + Group By Operator [GBY_300] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_332] + Reduce Output Operator [RS_330] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_331] (rows=26204 width=215) + Group By Operator [GBY_329] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_330] (rows=12574 width=215) + Select Operator [SEL_328] (rows=12574 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_329] (rows=12574 width=248) + Filter Operator [FIL_327] (rows=12574 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_328] (rows=18863 width=248) + PTF Operator [PTF_326] (rows=18863 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_327] (rows=18863 width=248) + Select Operator [SEL_325] (rows=18863 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + SHUFFLE [RS_324] PartitionCols:0 - Select Operator [SEL_325] (rows=18863 width=244) + Select Operator [SEL_323] (rows=18863 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_324] (rows=18863 width=244) + PTF Operator [PTF_322] (rows=18863 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_323] (rows=18863 width=244) + Select Operator [SEL_321] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + SHUFFLE [RS_320] PartitionCols:0 - Group By Operator [GBY_321] (rows=18863 width=244) + Group By Operator [GBY_319] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col0 Group By Operator [GBY_47] (rows=169767 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_237] (rows=9599627 width=236) - Conds:RS_43._col1, _col2=RS_320._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_235] (rows=9599627 width=236) + Conds:RS_43._col1, _col2=RS_318._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_318] PartitionCols:_col0, _col1 - Select Operator [SEL_319] (rows=9599627 width=124) + Select Operator [SEL_317] (rows=9599627 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_318] (rows=9599627 width=121) + Filter Operator [FIL_316] (rows=9599627 width=121) predicate:((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) TableScan [TS_37] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_236] (rows=31838858 width=124) - Conds:RS_317._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_234] (rows=31838858 width=124) + Conds:RS_315._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_272] + PARTITION_ONLY_SHUFFLE [RS_270] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_267] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_316] (rows=31838858 width=127) + Select Operator [SEL_314] (rows=31838858 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_315] (rows=31838858 width=239) + Filter Operator [FIL_313] (rows=31838858 width=239) predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_order_number is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) TableScan [TS_31] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_314] - Group By Operator [GBY_313] (rows=1 width=12) + BROADCAST [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_280] - Group By Operator [GBY_277] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_278] + Group By Operator [GBY_275] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_273] (rows=50 width=4) + Select Operator [SEL_271] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_267] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_301] + Reduce Output Operator [RS_299] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_300] (rows=26204 width=215) + Group By Operator [GBY_298] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_299] (rows=13630 width=211) + Select Operator [SEL_297] (rows=13630 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_298] (rows=13630 width=248) + Filter Operator [FIL_296] (rows=13630 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_297] (rows=20445 width=248) + PTF Operator [PTF_295] (rows=20445 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_296] (rows=20445 width=248) + Select Operator [SEL_294] (rows=20445 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + SHUFFLE [RS_293] PartitionCols:0 - Select Operator [SEL_294] (rows=20445 width=244) + Select Operator [SEL_292] (rows=20445 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_293] (rows=20445 width=244) + PTF Operator [PTF_291] (rows=20445 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_292] (rows=20445 width=244) + Select Operator [SEL_290] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_291] + SHUFFLE [RS_289] PartitionCols:0 - Group By Operator [GBY_290] (rows=20445 width=244) + Group By Operator [GBY_288] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=102225 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_235] (rows=5227456 width=236) - Conds:RS_12._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_233] (rows=5227456 width=236) + Conds:RS_12._col1, _col2=RS_287._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_287] PartitionCols:_col0, _col1 - Select Operator [SEL_288] (rows=4799489 width=124) + Select Operator [SEL_286] (rows=4799489 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_287] (rows=4799489 width=118) + Filter Operator [FIL_285] (rows=4799489 width=118) predicate:((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) TableScan [TS_6] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_234] (rows=15996318 width=124) - Conds:RS_286._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_232] (rows=15996318 width=124) + Conds:RS_284._col0=RS_268._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_270] + PARTITION_ONLY_SHUFFLE [RS_268] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_267] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_284] PartitionCols:_col0 - Select Operator [SEL_285] (rows=15996318 width=127) + Select Operator [SEL_283] (rows=15996318 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_284] (rows=15996318 width=239) + Filter Operator [FIL_282] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_283] - Group By Operator [GBY_282] (rows=1 width=12) + BROADCAST [RS_281] + Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_279] - Group By Operator [GBY_276] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_277] + Group By Operator [GBY_274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_271] (rows=50 width=4) + Select Operator [SEL_269] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_267] diff --git ql/src/test/results/clientpositive/perf/tez/query67.q.out ql/src/test/results/clientpositive/perf/tez/query67.q.out index 54d76e9c1a..bd225cdf87 100644 --- ql/src/test/results/clientpositive/perf/tez/query67.q.out +++ ql/src/test/results/clientpositive/perf/tez/query67.q.out @@ -111,92 +111,94 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_113] - Limit [LIM_112] (rows=100 width=617) + File Output Operator [FS_115] + Limit [LIM_114] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_111] (rows=1575989691 width=617) + Select Operator [SEL_113] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=1575989691 width=617) + SHUFFLE [RS_112] + Select Operator [SEL_111] (rows=1575989691 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_108] (rows=1575989691 width=613) + Top N Key Operator [TNK_110] (rows=1575989691 width=613) keys:_col2, _col1, _col0, _col3, _col4, _col6, _col5, _col7, _col8, rank_window_0,top n:100 - Filter Operator [FIL_107] (rows=1575989691 width=613) + Filter Operator [FIL_109] (rows=1575989691 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_106] (rows=4727969073 width=613) + PTF Operator [PTF_108] (rows=4727969073 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_105] (rows=4727969073 width=613) + Select Operator [SEL_107] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] + SHUFFLE [RS_106] PartitionCols:_col2 - Select Operator [SEL_103] (rows=4727969073 width=613) + Select Operator [SEL_105] (rows=4727969073 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_102] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_22] (rows=4727969073 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L - Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=613) - Conds:RS_18._col1=RS_101._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_100] (rows=462000 width=393) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_99] (rows=462000 width=393) - predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=393) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=228) - Conds:RS_15._col2=RS_98._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - PartitionCols:_col0 - Select Operator [SEL_97] (rows=1704 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_96] (rows=1704 width=104) - predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=131) - Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_87] - PartitionCols:_col0 - Select Operator [SEL_86] (rows=317 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_85] (rows=317 width=20) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=20) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_94] (rows=525329897 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_93] (rows=525329897 width=122) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_90] - Group By Operator [GBY_89] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_86] + Top N Key Operator [TNK_104] (rows=4727969073 width=621) + PartitionCols:_col2,keys:_col2, _col9,top n:101 + Group By Operator [GBY_103] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Group By Operator [GBY_22] (rows=4727969073 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L + Merge Join Operator [MERGEJOIN_85] (rows=525329897 width=613) + Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_100] (rows=462000 width=393) + predicate:i_item_sk is not null + TableScan [TS_9] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=228) + Conds:RS_15._col2=RS_99._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=1704 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_97] (rows=1704 width=104) + predicate:s_store_sk is not null + TableScan [TS_6] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=131) + Conds:RS_96._col0=RS_88._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_88] + PartitionCols:_col0 + Select Operator [SEL_87] (rows=317 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_86] (rows=317 width=20) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_95] (rows=525329897 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_94] (rows=525329897 width=122) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_93] + Group By Operator [GBY_92] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_89] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_87] diff --git ql/src/test/results/clientpositive/perf/tez/query70.q.out ql/src/test/results/clientpositive/perf/tez/query70.q.out index 23f9166a4f..6107ec0367 100644 --- ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -100,27 +100,27 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_170] - Limit [LIM_169] (rows=100 width=492) + File Output Operator [FS_172] + Limit [LIM_171] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_168] (rows=720 width=492) + Select Operator [SEL_170] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] - Select Operator [SEL_166] (rows=720 width=492) + SHUFFLE [RS_169] + Select Operator [SEL_168] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_165] (rows=720 width=304) + Top N Key Operator [TNK_167] (rows=720 width=304) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_164] (rows=720 width=304) + PTF Operator [PTF_166] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_163] (rows=720 width=304) + Select Operator [SEL_165] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_164] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_161] (rows=720 width=304) + Select Operator [SEL_163] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_160] (rows=720 width=304) + Group By Operator [GBY_162] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_49] @@ -129,89 +129,91 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L Select Operator [SEL_46] (rows=171536292 width=280) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_135] (rows=171536292 width=280) - Conds:RS_43._col7=RS_159._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_136] (rows=171536292 width=280) + Conds:RS_43._col7=RS_161._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_161] PartitionCols:_col0 - Select Operator [SEL_158] (rows=16 width=86) + Select Operator [SEL_160] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_157] (rows=16 width=198) + Filter Operator [FIL_159] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_156] (rows=49 width=198) + PTF Operator [PTF_158] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_155] (rows=49 width=198) + Select Operator [SEL_157] (rows=49 width=198) Output:["_col0","_col1"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] + SHUFFLE [RS_156] PartitionCols:_col0 - Group By Operator [GBY_153] (rows=49 width=198) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=19404 width=198) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=192) - Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_131] (rows=525329897 width=110) - Conds:RS_146._col0=RS_138._col0(Inner),Output:["_col1","_col2"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - PartitionCols:_col0 - Select Operator [SEL_137] (rows=317 width=8) - Output:["_col0"] - Filter Operator [FIL_136] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - PartitionCols:_col0 - Select Operator [SEL_145] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_144] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_139] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_137] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] - PartitionCols:_col0 - Select Operator [SEL_151] (rows=1704 width=90) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=1704 width=90) - predicate:(s_store_sk is not null and s_state is not null) - TableScan [TS_15] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + Top N Key Operator [TNK_155] (rows=49 width=198) + PartitionCols:_col0,keys:_col0, _col1,top n:6 + Group By Operator [GBY_154] (rows=49 width=198) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=19404 width=198) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192) + Conds:RS_21._col1=RS_153._col0(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=110) + Conds:RS_147._col0=RS_139._col0(Inner),Output:["_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Select Operator [SEL_138] (rows=317 width=8) + Output:["_col0"] + Filter Operator [FIL_137] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col0 + Select Operator [SEL_146] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_145] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] + Group By Operator [GBY_141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_140] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_138] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_152] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=1704 width=90) + predicate:(s_store_sk is not null and s_state is not null) + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=290) - Conds:RS_40._col1=RS_149._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=290) + Conds:RS_40._col1=RS_150._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_131] + Please refer to the previous Merge Join Operator [MERGEJOIN_132] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_148] (rows=1704 width=188) + Select Operator [SEL_149] (rows=1704 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_147] (rows=1704 width=188) + Filter Operator [FIL_148] (rows=1704 width=188) predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=188) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] diff --git ql/src/test/results/clientpositive/topnkey_windowing.q.out ql/src/test/results/clientpositive/topnkey_windowing.q.out new file mode 100644 index 0000000000..c186790bea --- /dev/null +++ ql/src/test/results/clientpositive/topnkey_windowing.q.out @@ -0,0 +1,576 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 104), + ('A', 109), + ('A', 109), + ('A', 103), + (NULL, NULL), + (NULL, 109), + ('A', 109), + ('A', 101), + ('A', 101), + ('A', 114), + ('A', 120), + ('B', 105), + ('B', 106), + ('B', 106), + ('B', NULL), + ('B', 106), + ('A', 107), + ('B', 108), + ('A', 102), + ('B', 110), + (NULL, NULL), + (NULL, 109), + ('A', 109) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [$hdt$_0:topnkey_windowing] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: dense_rank_window_0 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (dense_rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), dense_rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +NULL 2 +NULL 2 +NULL 2 +A 1 +A 1 +A 2 +A 3 +B 1 +B 2 +B 2 +B 2 +B 3 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing diff --git ql/src/test/results/clientpositive/topnkey_windowing_order.q.out ql/src/test/results/clientpositive/topnkey_windowing_order.q.out new file mode 100644 index 0000000000..7a9a67d001 --- /dev/null +++ ql/src/test/results/clientpositive/topnkey_windowing_order.q.out @@ -0,0 +1,517 @@ +PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@topnkey_windowing +PREHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: INSERT INTO topnkey_windowing VALUES + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 104, 9), + ('A', 'D', 109, 9), + ('A', 'C', 109, 9), + ('A', 'C', 103, 9), + (NULL, NULL, NULL, NULL), + (NULL, 'D', 109, 9), + ('A', 'D', 109, 9), + ('A', 'D', 101, 9), + ('A', 'D', 101, 9), + ('A', 'D', 114, 9), + ('A', 'D', 120, 9), + ('B', 'E', 105, 9), + ('B', 'E', 106, 9), + ('B', 'E', 106, 9), + ('B', 'E', NULL, NULL), + ('B', 'E', 106, 9), + ('A', 'C', 107, 9), + ('B', 'E', 108, 9), + ('A', 'C', 102, 9), + ('B', 'E', 110, 9), + (NULL, NULL, NULL, NULL), + (NULL, NULL, 109, 9), + ('A', 'D', 109, 9) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@topnkey_windowing +POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT [] +POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double) + null sort order: aa + sort order: ++ + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 3 +B 3 +B 3 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double) + null sort order: aza + sort order: ++- + Map-reduce partition columns: tw_a (type: string) + Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2, _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2681 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double) + null sort order: aaz + sort order: +++ + Map-reduce partition columns: tw_a (type: string), tw_b (type: string) + Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 3220 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_a, ranking +FROM ( + SELECT tw_a AS tw_a, + rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 2 +NULL 2 +NULL 2 +NULL 1 +NULL 1 +A 1 +A 2 +A 3 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: DROP TABLE topnkey_windowing +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@topnkey_windowing +PREHOOK: Output: default@topnkey_windowing +POSTHOOK: query: DROP TABLE topnkey_windowing +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@topnkey_windowing +POSTHOOK: Output: default@topnkey_windowing