diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 353d8db..7fdcd9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -142,9 +142,10 @@ else if (aggregateType == Group.CUBE) { b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); } - for (int i : groupBy.getGroupSet()) { - RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); + // Group by columns: if the columns are present in the order by, we create them + // in that order. This help us triggering RS deduplication for more cases. + List iRefs = extractGroupByColumnsInOrder(); + for (RexInputRef iRef : iRefs) { b.add(iRef.accept(new RexVisitor(schema))); } @@ -214,6 +215,46 @@ else if (aggregateType == Group.CUBE) { return hiveAST.getAST(); } + private List extractGroupByColumnsInOrder() { + // 1) We extract the group by positions that are part of the order by and its order + List groupByPositions = new ArrayList<>(); + if (!groupBy.indicator && orderLimit != null + && !orderLimit.getCollation().getFieldCollations().isEmpty()) { + Map obRefToCallMap = ((HiveSortLimit) orderLimit).getInputRefToCallMap(); + for (RelFieldCollation c : orderLimit.getCollation().getFieldCollations()) { + RexNode rexNode = null; + if (obRefToCallMap != null) { + rexNode = obRefToCallMap.get(c.getFieldIndex()); + } + if (rexNode == null) { + rexNode = select.getChildExps().get(c.getFieldIndex()); + } + if (rexNode instanceof RexInputRef) { + // Direct reference + RexInputRef inputRef = (RexInputRef) rexNode; + if (inputRef.getIndex() < groupBy.getGroupCount()) { + // Group column found + groupByPositions.add(groupBy.getGroupSet().nth(inputRef.getIndex())); + } + } + } + } + + // 2) We create the group by column references + List groupByColumns = new ArrayList(groupBy.getGroupSet().cardinality()); + for (int i : groupByPositions) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + for (int i : groupBy.getGroupSet()) { + if (!groupByPositions.contains(i)) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + } + return groupByColumns; + } + private void convertOrderLimitToASTNode(HiveSortLimit order) { if (order != null) { HiveSortLimit hiveSortLimit = order; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 77771c3..c2500c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -205,7 +205,7 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc return false; } - Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRSNc.getOrder(), + Integer moveRSOrderTo = checkOrder(true, cRSc.getOrder(), pRSNc.getOrder(), cRSc.getNullOrder(), pRSNc.getNullOrder()); if (moveRSOrderTo == null) { return false; @@ -304,6 +304,13 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } pRS.getConf().setOrder(cRS.getConf().getOrder()); pRS.getConf().setNullOrder(cRS.getConf().getNullOrder()); + } else { + StringBuilder order = new StringBuilder(cRS.getConf().getOrder()); + StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder()); + order.append(pRS.getConf().getOrder().substring(order.length())); + orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length())); + pRS.getConf().setOrder(order.toString()); + pRS.getConf().setNullOrder(orderNull.toString()); } if (result[3] > 0) { @@ -342,7 +349,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); - Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder(), + Integer moveRSOrderTo = checkOrder(false, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder()); if (moveRSOrderTo == null) { return null; @@ -452,8 +459,7 @@ protected Integer sameKeys(List cexprs, List pexprs, return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } - // order of overlapping keys should be exactly the same - protected Integer checkOrder(String corder, String porder, + protected Integer checkOrder(boolean checkStrictEquality, String corder, String porder, String cNullOrder, String pNullOrder) { assert corder.length() == cNullOrder.length(); assert porder.length() == pNullOrder.length(); @@ -468,12 +474,15 @@ protected Integer checkOrder(String corder, String porder, } corder = corder.trim(); porder = porder.trim(); - cNullOrder = cNullOrder.trim(); - pNullOrder = pNullOrder.trim(); - int target = Math.min(corder.length(), porder.length()); - if (!corder.substring(0, target).equals(porder.substring(0, target)) || - !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { - return null; + if (checkStrictEquality) { + // order of overlapping keys should be exactly the same + cNullOrder = cNullOrder.trim(); + pNullOrder = pNullOrder.trim(); + int target = Math.min(corder.length(), porder.length()); + if (!corder.substring(0, target).equals(porder.substring(0, target)) || + !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { + return null; + } } return Integer.valueOf(corder.length()).compareTo(porder.length()); } diff --git ql/src/test/queries/clientpositive/limit_pushdown2.q ql/src/test/queries/clientpositive/limit_pushdown2.q new file mode 100644 index 0000000..637b5b0 --- /dev/null +++ ql/src/test/queries/clientpositive/limit_pushdown2.q @@ -0,0 +1,78 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +-- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +-- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index e198617..e6ee3e4 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -1540,12 +1540,12 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1563,7 +1563,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint) + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out new file mode 100644 index 0000000..2f68674 --- /dev/null +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -0,0 +1,804 @@ +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: +- + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: double), _col0 (type: string) + sort order: +- + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 1.0 +val_2 3.0 +val_4 5.0 +val_5 6.0 +val_8 9.0 +val_9 10.0 +val_10 11.0 +val_11 12.0 +val_12 13.0 +val_15 16.0 +val_17 18.0 +val_18 19.0 +val_19 20.0 +val_20 21.0 +val_24 25.0 +val_26 27.0 +val_27 28.0 +val_28 29.0 +val_30 31.0 +val_33 34.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 261.182 +NULL val_0 1.0 +NULL val_10 11.0 +NULL val_100 101.0 +NULL val_103 104.0 +NULL val_104 105.0 +NULL val_105 106.0 +NULL val_11 12.0 +NULL val_111 112.0 +NULL val_113 114.0 +NULL val_114 115.0 +NULL val_116 117.0 +NULL val_118 119.0 +NULL val_119 120.0 +NULL val_12 13.0 +NULL val_120 121.0 +NULL val_125 126.0 +NULL val_126 127.0 +NULL val_128 129.0 +NULL val_129 130.0 diff --git ql/src/test/results/clientpositive/lineage3.q.out ql/src/test/results/clientpositive/lineage3.q.out index 12ae13e..a769022 100644 --- ql/src/test/results/clientpositive/lineage3.q.out +++ ql/src/test/results/clientpositive/lineage3.q.out @@ -317,7 +317,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} 38 216 false 38 229 true PREHOOK: query: drop table if exists src_dp diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index bc23b36..760129d 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -785,7 +785,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -820,20 +821,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) - keys: _col0 (type: int), _col1 (type: int) - mode: complete + keys: _col1 (type: int), _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -842,6 +839,30 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 6d4f13a..d809808 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -89,26 +89,26 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator