diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 353d8db..6d26eee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -142,9 +142,10 @@ else if (aggregateType == Group.CUBE) { b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); } - for (int i : groupBy.getGroupSet()) { - RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); + // Group by columns: if the columns are present in the order by, we create them + // in that order. This help us triggering RS deduplication for more cases. + List iRefs = extractGroupByColumnsInOrder(); + for (RexInputRef iRef : iRefs) { b.add(iRef.accept(new RexVisitor(schema))); } @@ -214,6 +215,39 @@ else if (aggregateType == Group.CUBE) { return hiveAST.getAST(); } + private List extractGroupByColumnsInOrder() { + // 1) We extract the group by positions that are part of the order by and its order + List groupByPositions = new ArrayList<>(); + if (!groupBy.indicator && orderLimit != null + && !orderLimit.getCollation().getFieldCollations().isEmpty()) { + for (RelFieldCollation c : orderLimit.getCollation().getFieldCollations()) { + RexNode rexNode = select.getChildExps().get(c.getFieldIndex()); + if (rexNode instanceof RexInputRef) { + // Direct reference + RexInputRef inputRef = (RexInputRef) rexNode; + if (inputRef.getIndex() < groupBy.getGroupCount()) { + // Group column found + groupByPositions.add(groupBy.getGroupSet().nth(inputRef.getIndex())); + } + } + } + } + + // 2) We create the group by column references + List groupByColumns = new ArrayList(groupBy.getGroupSet().cardinality()); + for (int i : groupByPositions) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + for (int i : groupBy.getGroupSet()) { + if (!groupByPositions.contains(i)) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + } + return groupByColumns; + } + private void convertOrderLimitToASTNode(HiveSortLimit order) { if (order != null) { HiveSortLimit hiveSortLimit = order; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 77771c3..7830a05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -205,7 +205,7 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc return false; } - Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRSNc.getOrder(), + Integer moveRSOrderTo = checkOrder(true, cRSc.getOrder(), pRSNc.getOrder(), cRSc.getNullOrder(), pRSNc.getNullOrder()); if (moveRSOrderTo == null) { return false; @@ -304,6 +304,13 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } pRS.getConf().setOrder(cRS.getConf().getOrder()); pRS.getConf().setNullOrder(cRS.getConf().getNullOrder()); + } else { + StringBuilder order = new StringBuilder(cRS.getConf().getOrder()); + StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder()); + order.append(pRS.getConf().getOrder().substring(order.length())); + orderNull.append(pRS.getConf().getOrder().substring(orderNull.length())); + pRS.getConf().setOrder(order.toString()); + pRS.getConf().setNullOrder(orderNull.toString()); } if (result[3] > 0) { @@ -342,7 +349,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); - Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder(), + Integer moveRSOrderTo = checkOrder(false, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder()); if (moveRSOrderTo == null) { return null; @@ -452,8 +459,7 @@ protected Integer sameKeys(List cexprs, List pexprs, return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } - // order of overlapping keys should be exactly the same - protected Integer checkOrder(String corder, String porder, + protected Integer checkOrder(boolean checkStrictEquality, String corder, String porder, String cNullOrder, String pNullOrder) { assert corder.length() == cNullOrder.length(); assert porder.length() == pNullOrder.length(); @@ -468,12 +474,15 @@ protected Integer checkOrder(String corder, String porder, } corder = corder.trim(); porder = porder.trim(); - cNullOrder = cNullOrder.trim(); - pNullOrder = pNullOrder.trim(); - int target = Math.min(corder.length(), porder.length()); - if (!corder.substring(0, target).equals(porder.substring(0, target)) || - !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { - return null; + if (checkStrictEquality) { + // order of overlapping keys should be exactly the same + cNullOrder = cNullOrder.trim(); + pNullOrder = pNullOrder.trim(); + int target = Math.min(corder.length(), porder.length()); + if (!corder.substring(0, target).equals(porder.substring(0, target)) || + !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { + return null; + } } return Integer.valueOf(corder.length()).compareTo(porder.length()); } diff --git ql/src/test/queries/clientpositive/limit_pushdown2.q ql/src/test/queries/clientpositive/limit_pushdown2.q new file mode 100644 index 0000000..f650405 --- /dev/null +++ ql/src/test/queries/clientpositive/limit_pushdown2.q @@ -0,0 +1,64 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +-- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +-- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +-- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +-- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +-- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +-- NOT DONE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out new file mode 100644 index 0000000..73317ac --- /dev/null +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -0,0 +1,603 @@ +PREHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: +- + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- DONE +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: -- NOT DONE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT DONE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 261.182 +NULL val_0 1.0 +NULL val_10 11.0 +NULL val_100 101.0 +NULL val_103 104.0 +NULL val_104 105.0 +NULL val_105 106.0 +NULL val_11 12.0 +NULL val_111 112.0 +NULL val_113 114.0 +NULL val_114 115.0 +NULL val_116 117.0 +NULL val_118 119.0 +NULL val_119 120.0 +NULL val_12 13.0 +NULL val_120 121.0 +NULL val_125 126.0 +NULL val_126 127.0 +NULL val_128 129.0 +NULL val_129 130.0