diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index bd8ff6285e..dd66dfcd72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -79,7 +79,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { partitionKeyObjectInspectors, partitionCurrentKeyObjectInspectors); keyWrapperComparator = new KeyWrapperComparator( - keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder.toString(), nullSortOrder.toString()); + keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder, nullSortOrder); this.topNKeyFilters = new HashMap<>(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index f03d65030d..7feadd3137 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -17,9 +17,15 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.KeyWrapper; +import org.apache.hadoop.hive.ql.exec.KeyWrapperComparator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TopNKeyFilter; import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; @@ -45,8 +51,11 @@ // Batch processing private transient int[] temporarySelected; + private transient VectorHashKeyWrapperBatch partitionKeyWrapperBatch; private transient VectorHashKeyWrapperBatch keyWrappersBatch; - private transient TopNKeyFilter topNKeyFilter; + private transient Map topNKeyFilters; + private transient Comparator keyWrapperComparator; + public VectorTopNKeyOperator(CompilationOpContext ctx, OperatorDesc conf, VectorizationContext vContext, VectorDesc vectorDesc) { @@ -72,17 +81,24 @@ protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); VectorExpression[] keyExpressions = vectorDesc.getKeyExpressions(); - VectorExpression.doTransientInit(keyExpressions, hconf); - for (VectorExpression keyExpression : keyExpressions) { - keyExpression.init(hconf); - } + initKeyExpressions(hconf, keyExpressions); + + VectorExpression[] partitionKeyExpressions = vectorDesc.getPartitionKeyColumns(); + initKeyExpressions(hconf, partitionKeyExpressions); temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE]; keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); - this.topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrappersBatch.getComparator( - conf.getColumnSortOrder(), - conf.getNullOrder())); + keyWrapperComparator = keyWrappersBatch.getComparator(conf.getColumnSortOrder(), conf.getNullOrder()); + partitionKeyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(partitionKeyExpressions); + topNKeyFilters = new HashMap<>(); + } + + private void initKeyExpressions(Configuration hconf, VectorExpression[] keyExpressions) throws HiveException { + VectorExpression.doTransientInit(keyExpressions, hconf); + for (VectorExpression keyExpression : keyExpressions) { + keyExpression.init(hconf); + } } @Override @@ -101,6 +117,9 @@ public void process(Object data, int tag) throws HiveException { keyExpression.evaluate(batch); } + partitionKeyWrapperBatch.evaluateBatch(batch); + VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers(); + keyWrappersBatch.evaluateBatch(batch); VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); @@ -116,6 +135,12 @@ public void process(Object data, int tag) throws HiveException { } // Select a row in the priority queue + TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKeyWrappers[i]); + if (topNKeyFilter == null) { + topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator); + topNKeyFilters.put(partitionKeyWrappers[i].copyKey(), topNKeyFilter); + } + if (topNKeyFilter.canForward(keyWrappers[i])) { selected[size++] = j; } @@ -169,8 +194,10 @@ public OperatorType getType() { @Override protected void closeOp(boolean abort) throws HiveException { - LOG.info("Closing TopNKeyFilter: {}.", topNKeyFilter); - topNKeyFilter.clear(); +// LOG.info("Closing TopNKeyFilter: {}.", topNKeyFilter); + for (TopNKeyFilter topNKeyFilter : topNKeyFilters.values()) { + topNKeyFilter.clear(); + } super.closeOp(abort); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 27ff0c2484..d7d8b6fee1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -4332,21 +4332,26 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException { TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf(); + VectorExpression[] keyExpressions = getVectorExpressions(vContext, topNKeyDesc.getKeyColumns()); + VectorExpression[] partitionKeyExpressions = getVectorExpressions(vContext, topNKeyDesc.getPartitionKeyColumns()); + + vectorTopNKeyDesc.setKeyExpressions(keyExpressions); + vectorTopNKeyDesc.setPartitionKeyColumns(partitionKeyExpressions); + return OperatorFactory.getVectorOperator( + topNKeyOperator.getCompilationOpContext(), topNKeyDesc, + vContext, vectorTopNKeyDesc); + } + + private static VectorExpression[] getVectorExpressions(VectorizationContext vContext, List keyColumns) throws HiveException { VectorExpression[] keyExpressions; - // this will mark all actual computed columns vContext.markActualScratchColumns(); try { - List keyColumns = topNKeyDesc.getKeyColumns(); keyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyColumns); fixDecimalDataTypePhysicalVariations(vContext, keyExpressions); } finally { vContext.freeMarkedScratchColumns(); } - - vectorTopNKeyDesc.setKeyExpressions(keyExpressions); - return OperatorFactory.getVectorOperator( - topNKeyOperator.getCompilationOpContext(), topNKeyDesc, - vContext, vectorTopNKeyDesc); + return keyExpressions; } private static Class findVecAggrClass( diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java index 9a266a0c57..ee5229368e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java @@ -25,6 +25,7 @@ private static final long serialVersionUID = 1L; private VectorExpression[] keyExpressions; + private VectorExpression[] partitionKeyColumns; public VectorTopNKeyDesc() { } @@ -36,4 +37,12 @@ public VectorTopNKeyDesc() { public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } + + public VectorExpression[] getPartitionKeyColumns() { + return partitionKeyColumns; + } + + public void setPartitionKeyColumns(VectorExpression[] partitionKeyColumns) { + this.partitionKeyColumns = partitionKeyColumns; + } } diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q index 96ed1bae41..a5b3ce7951 100644 --- ql/src/test/queries/clientpositive/subquery_in.q +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -3,7 +3,6 @@ --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q index f25168ab77..f8636453c2 100644 --- ql/src/test/queries/clientpositive/subquery_notin.q +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -2,7 +2,6 @@ --! qt:dataset:part --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; -set hive.optimize.topnkey=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/topnkey_windowing.q ql/src/test/queries/clientpositive/topnkey_windowing.q index a5352d2d6c..8ac85c6dcd 100644 --- ql/src/test/queries/clientpositive/topnkey_windowing.q +++ ql/src/test/queries/clientpositive/topnkey_windowing.q @@ -1,6 +1,5 @@ SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1431655765; -SET hive.vectorized.execution.enabled=false; CREATE TABLE topnkey_windowing (tw_code string, tw_value double); @@ -33,6 +32,23 @@ INSERT INTO topnkey_windowing VALUES ('A', 109); SET hive.optimize.topnkey=true; +SET hive.vectorized.execution.enabled=false; +EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.vectorized.execution.enabled=true; EXPLAIN SELECT tw_code, ranking FROM ( @@ -58,6 +74,23 @@ FROM ( SET hive.optimize.topnkey=true; +SET hive.vectorized.execution.enabled=false; +EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3; + +SET hive.vectorized.execution.enabled=true; EXPLAIN extended SELECT tw_code, ranking FROM ( @@ -84,6 +117,7 @@ FROM ( SET hive.optimize.topnkey=true; +SET hive.vectorized.execution.enabled=true; EXPLAIN SELECT tw_code, ranking FROM ( diff --git ql/src/test/queries/clientpositive/vector_windowing_streaming.q ql/src/test/queries/clientpositive/vector_windowing_streaming.q index 2f7b628db3..e1011f9949 100644 --- ql/src/test/queries/clientpositive/vector_windowing_streaming.q +++ ql/src/test/queries/clientpositive/vector_windowing_streaming.q @@ -5,7 +5,6 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; set hive.vectorized.execution.ptf.enabled=true; set hive.fetch.task.conversion=none; -set hive.optimize.topnkey=false; drop table over10k_n8; diff --git ql/src/test/queries/clientpositive/windowing_filter.q ql/src/test/queries/clientpositive/windowing_filter.q index 14d0c5a7c8..2483c18416 100644 --- ql/src/test/queries/clientpositive/windowing_filter.q +++ ql/src/test/queries/clientpositive/windowing_filter.q @@ -1,6 +1,5 @@ set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=1431655765; -set hive.optimize.topnkey=false; create table testtable_n1000 (s_state string, ss_net_profit double); diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index ea8fe5ea96..d14dbb1145 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -322,13 +322,20 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -510,13 +517,20 @@ STAGE PLANS: Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -3434,13 +3448,19 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z + Top N Key Operator sort order: + + keys: _col3 (type: string) + null sort order: z Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + top n: 4 + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index c24b79db86..36cbcb7e1b 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -365,25 +365,39 @@ STAGE PLANS: Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: p_name (type: string) + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: p_name (type: string) Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: p_name (type: string) + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: p_name (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -605,7 +619,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select p_name, p_size from @@ -667,13 +681,20 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -857,7 +878,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -969,13 +990,20 @@ STAGE PLANS: Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_size (type: int) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + top n: 3 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -4512,7 +4540,7 @@ POSTHOOK: Input: default@part 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4615,17 +4643,23 @@ STAGE PLANS: Filter Operator predicate: ((_col12 is null or (_col9 = 0L)) and ((_col10 >= _col9) or (_col9 = 0L) or _col12 is not null or _col5 is null) and (_col5 is not null or (_col9 = 0L) or _col12 is not null)) (type: boolean) Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ + Top N Key Operator + sort order: ++ + keys: _col3 (type: string), _col0 (type: int) + null sort order: zz + Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col3 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4721,7 +4755,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out index 52ba490c01..80aa189ef8 100644 --- ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out +++ ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out @@ -170,6 +170,136 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: tw_code (type: string), tw_value (type: double) + null sort order: az + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT tw_code, ranking FROM ( SELECT tw_code AS tw_code, @@ -406,6 +536,207 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Top N Key Operator + sort order: + + keys: tw_value (type: double) + null sort order: z + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + top n: 4 + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [topnkey_windowing] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT tw_code, ranking FROM ( SELECT tw_code as tw_code, @@ -502,7 +833,7 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index b63bcf47f3..314300f371 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -232,18 +232,29 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: az + Top N Key Operator sort order: ++ + keys: p_mfgr (type: string), p_name (type: string) + null sort order: az Map-reduce partition columns: p_mfgr (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No PTF TopN IS false Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.8 + top n: 4 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:string, col 1:string + native: true + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No PTF TopN IS false + Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -445,18 +456,29 @@ STAGE PLANS: predicateExpression: FilterLongColLessLongScalar(col 0:tinyint, val 5) predicate: (t < 5Y) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: t (type: tinyint), f (type: float) - null sort order: az + Top N Key Operator sort order: ++ + keys: t (type: tinyint), f (type: float) + null sort order: az Map-reduce partition columns: t (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No PTF TopN IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.8 + top n: 6 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:tinyint, col 4:float + native: true + Reduce Output Operator + key expressions: t (type: tinyint), f (type: float) + null sort order: az + sort order: ++ + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No PTF TopN IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.8 Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -683,13 +705,20 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: ctinyint (type: tinyint), cdouble (type: double) - null sort order: az + Top N Key Operator sort order: ++ + keys: ctinyint (type: tinyint), cdouble (type: double) + null sort order: az Map-reduce partition columns: ctinyint (type: tinyint) Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.8 + top n: 5 + Reduce Output Operator + key expressions: ctinyint (type: tinyint), cdouble (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: ctinyint (type: tinyint) + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -698,7 +727,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) outputColumnNames: _col0, _col5 - Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3365908 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -719,17 +748,17 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3365908 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 < 5) (type: boolean) - Statistics: Num rows: 4096 Data size: 1121976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1097740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4096 Data size: 40632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4096 Data size: 40632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,18 +852,29 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Reduce Output Operator - key expressions: ctinyint (type: tinyint), cdouble (type: double) - null sort order: az + Top N Key Operator sort order: ++ + keys: ctinyint (type: tinyint), cdouble (type: double) + null sort order: az Map-reduce partition columns: ctinyint (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No PTF TopN IS false Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.8 + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:tinyint, col 5:double + native: true + Reduce Output Operator + key expressions: ctinyint (type: tinyint), cdouble (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: ctinyint (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No PTF TopN IS false + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -863,7 +903,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) outputColumnNames: _col0, _col5 - Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3365908 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -884,17 +924,17 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3365908 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 < 5) (type: boolean) - Statistics: Num rows: 4096 Data size: 1121976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1097740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4096 Data size: 40632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4096 Data size: 40632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -903,7 +943,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: tinyint), _col1 (type: double), _col2 (type: int) outputColumnNames: col1, col2, col3 - Statistics: Num rows: 4096 Data size: 40632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/windowing_filter.q.out ql/src/test/results/clientpositive/llap/windowing_filter.q.out index 8ef2261755..5dbb8849c6 100644 --- ql/src/test/results/clientpositive/llap/windowing_filter.q.out +++ ql/src/test/results/clientpositive/llap/windowing_filter.q.out @@ -107,13 +107,20 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - null sort order: az + Top N Key Operator sort order: +- + keys: _col0 (type: string), _col1 (type: double) + null sort order: az Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + top n: 6 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + null sort order: az + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/topnkey_windowing.q.out ql/src/test/results/clientpositive/topnkey_windowing.q.out index c186790bea..9f64dcaeea 100644 --- ql/src/test/results/clientpositive/topnkey_windowing.q.out +++ ql/src/test/results/clientpositive/topnkey_windowing.q.out @@ -153,6 +153,120 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +NULL 1 +NULL 1 +NULL 1 +A 1 +A 1 +A 3 +B 1 +B 2 +B 2 +B 2 +PREHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT tw_code, ranking +FROM ( + SELECT tw_code AS tw_code, + rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: tw_code (type: string), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: tw_code (type: string) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT tw_code, ranking FROM ( SELECT tw_code AS tw_code, @@ -373,6 +487,192 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +A 1 +A 1 +A 3 +PREHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN extended +SELECT tw_code, ranking +FROM ( + SELECT tw_code as tw_code, + rank() OVER (ORDER BY tw_value) AS ranking + FROM topnkey_windowing) tmp1 + WHERE ranking <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@topnkey_windowing +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT * +FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0` +FROM `default`.`topnkey_windowing`) AS `t` +WHERE `rank_window_0` <= 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: topnkey_windowing + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: 0 (type: int), tw_value (type: double) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 4 + TopN Hash Memory Usage: 0.1 + value expressions: tw_code (type: string) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: topnkey_windowing + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns tw_code,tw_value + columns.comments + columns.types string:double +#### A masked pattern was here #### + name default.topnkey_windowing + numFiles 1 + numRows 26 + rawDataSize 176 + serialization.ddl struct topnkey_windowing { string tw_code, double tw_value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 202 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.topnkey_windowing + name: default.topnkey_windowing + Truncated Path -> Alias: + /topnkey_windowing [$hdt$_0:topnkey_windowing] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (rank_window_0 <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT tw_code, ranking FROM ( SELECT tw_code as tw_code, @@ -453,6 +753,7 @@ STAGE PLANS: Map-reduce partition columns: tw_code (type: string) Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)