diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 353d8db..7fdcd9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -142,9 +142,10 @@ else if (aggregateType == Group.CUBE) { b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); } - for (int i : groupBy.getGroupSet()) { - RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); + // Group by columns: if the columns are present in the order by, we create them + // in that order. This help us triggering RS deduplication for more cases. + List iRefs = extractGroupByColumnsInOrder(); + for (RexInputRef iRef : iRefs) { b.add(iRef.accept(new RexVisitor(schema))); } @@ -214,6 +215,46 @@ else if (aggregateType == Group.CUBE) { return hiveAST.getAST(); } + private List extractGroupByColumnsInOrder() { + // 1) We extract the group by positions that are part of the order by and its order + List groupByPositions = new ArrayList<>(); + if (!groupBy.indicator && orderLimit != null + && !orderLimit.getCollation().getFieldCollations().isEmpty()) { + Map obRefToCallMap = ((HiveSortLimit) orderLimit).getInputRefToCallMap(); + for (RelFieldCollation c : orderLimit.getCollation().getFieldCollations()) { + RexNode rexNode = null; + if (obRefToCallMap != null) { + rexNode = obRefToCallMap.get(c.getFieldIndex()); + } + if (rexNode == null) { + rexNode = select.getChildExps().get(c.getFieldIndex()); + } + if (rexNode instanceof RexInputRef) { + // Direct reference + RexInputRef inputRef = (RexInputRef) rexNode; + if (inputRef.getIndex() < groupBy.getGroupCount()) { + // Group column found + groupByPositions.add(groupBy.getGroupSet().nth(inputRef.getIndex())); + } + } + } + } + + // 2) We create the group by column references + List groupByColumns = new ArrayList(groupBy.getGroupSet().cardinality()); + for (int i : groupByPositions) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + for (int i : groupBy.getGroupSet()) { + if (!groupByPositions.contains(i)) { + groupByColumns.add(new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY))); + } + } + return groupByColumns; + } + private void convertOrderLimitToASTNode(HiveSortLimit order) { if (order != null) { HiveSortLimit hiveSortLimit = order; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 77771c3..c2500c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -205,7 +205,7 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc return false; } - Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRSNc.getOrder(), + Integer moveRSOrderTo = checkOrder(true, cRSc.getOrder(), pRSNc.getOrder(), cRSc.getNullOrder(), pRSNc.getNullOrder()); if (moveRSOrderTo == null) { return false; @@ -304,6 +304,13 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } pRS.getConf().setOrder(cRS.getConf().getOrder()); pRS.getConf().setNullOrder(cRS.getConf().getNullOrder()); + } else { + StringBuilder order = new StringBuilder(cRS.getConf().getOrder()); + StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder()); + order.append(pRS.getConf().getOrder().substring(order.length())); + orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length())); + pRS.getConf().setOrder(order.toString()); + pRS.getConf().setNullOrder(orderNull.toString()); } if (result[3] > 0) { @@ -342,7 +349,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); - Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder(), + Integer moveRSOrderTo = checkOrder(false, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder()); if (moveRSOrderTo == null) { return null; @@ -452,8 +459,7 @@ protected Integer sameKeys(List cexprs, List pexprs, return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } - // order of overlapping keys should be exactly the same - protected Integer checkOrder(String corder, String porder, + protected Integer checkOrder(boolean checkStrictEquality, String corder, String porder, String cNullOrder, String pNullOrder) { assert corder.length() == cNullOrder.length(); assert porder.length() == pNullOrder.length(); @@ -468,12 +474,15 @@ protected Integer checkOrder(String corder, String porder, } corder = corder.trim(); porder = porder.trim(); - cNullOrder = cNullOrder.trim(); - pNullOrder = pNullOrder.trim(); - int target = Math.min(corder.length(), porder.length()); - if (!corder.substring(0, target).equals(porder.substring(0, target)) || - !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { - return null; + if (checkStrictEquality) { + // order of overlapping keys should be exactly the same + cNullOrder = cNullOrder.trim(); + pNullOrder = pNullOrder.trim(); + int target = Math.min(corder.length(), porder.length()); + if (!corder.substring(0, target).equals(porder.substring(0, target)) || + !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { + return null; + } } return Integer.valueOf(corder.length()).compareTo(porder.length()); } diff --git ql/src/test/queries/clientpositive/limit_pushdown2.q ql/src/test/queries/clientpositive/limit_pushdown2.q new file mode 100644 index 0000000..637b5b0 --- /dev/null +++ ql/src/test/queries/clientpositive/limit_pushdown2.q @@ -0,0 +1,78 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +-- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +-- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index e198617..e6ee3e4 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -1540,12 +1540,12 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1563,7 +1563,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint) + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out new file mode 100644 index 0000000..2f68674 --- /dev/null +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -0,0 +1,804 @@ +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: +- + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: double), _col0 (type: string) + sort order: +- + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 1.0 +val_2 3.0 +val_4 5.0 +val_5 6.0 +val_8 9.0 +val_9 10.0 +val_10 11.0 +val_11 12.0 +val_12 13.0 +val_15 16.0 +val_17 18.0 +val_18 19.0 +val_19 20.0 +val_20 21.0 +val_24 25.0 +val_26 27.0 +val_27 28.0 +val_28 29.0 +val_30 31.0 +val_33 34.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 261.182 +NULL val_0 1.0 +NULL val_10 11.0 +NULL val_100 101.0 +NULL val_103 104.0 +NULL val_104 105.0 +NULL val_105 106.0 +NULL val_11 12.0 +NULL val_111 112.0 +NULL val_113 114.0 +NULL val_114 115.0 +NULL val_116 117.0 +NULL val_118 119.0 +NULL val_119 120.0 +NULL val_12 13.0 +NULL val_120 121.0 +NULL val_125 126.0 +NULL val_126 127.0 +NULL val_128 129.0 +NULL val_129 130.0 diff --git ql/src/test/results/clientpositive/lineage3.q.out ql/src/test/results/clientpositive/lineage3.q.out index 12ae13e..a769022 100644 --- ql/src/test/results/clientpositive/lineage3.q.out +++ ql/src/test/results/clientpositive/lineage3.q.out @@ -317,7 +317,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} 38 216 false 38 229 true PREHOOK: query: drop table if exists src_dp diff --git ql/src/test/results/clientpositive/perf/query45.q.out ql/src/test/results/clientpositive/perf/query45.q.out index 04f9b02..c11cd2d 100644 --- ql/src/test/results/clientpositive/perf/query45.q.out +++ ql/src/test/results/clientpositive/perf/query45.q.out @@ -25,15 +25,15 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] - Select Operator [SEL_41] (rows=95833781 width=135) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_40] (rows=95833781 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0, _col1 - Group By Operator [GBY_38] (rows=191667562 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col3, _col4 + Group By Operator [GBY_40] (rows=95833781 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1 + Group By Operator [GBY_38] (rows=191667562 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col4, _col3 + Select Operator [SEL_37] (rows=191667562 width=135) + Output:["_col4","_col3","_col10"] Merge Join Operator [MERGEJOIN_72] (rows=191667562 width=135) Conds:RS_34._col0=RS_35._col4(Inner),Output:["_col3","_col4","_col10"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index cb3d9a4..bd6e5ab 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -94,14 +94,14 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized @@ -109,12 +109,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 1871c7e..be4b1a0 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -651,70 +651,68 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=1 width=20) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_33] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_30] (rows=1 width=20) - Output:["_col2","_col6"] - Filter Operator [FIL_29] (rows=1 width=20) - predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) - Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_24] (rows=20 width=80) - Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=20 width=80) - predicate:(c_int > 0) - TableScan [TS_22] (rows=20 width=80) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_8] - Select Operator [SEL_6] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_19] - Select Operator [SEL_17] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_16] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_11] (rows=20 width=83) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_33] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0, _col1 + Group By Operator [GBY_31] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col6, _col2 + Select Operator [SEL_30] (rows=1 width=20) + Output:["_col6","_col2"] + Filter Operator [FIL_29] (rows=1 width=20) + predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) + Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) + Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=20 width=80) + Output:["_col0","_col1"] + Filter Operator [FIL_41] (rows=20 width=80) + predicate:(c_int > 0) + TableScan [TS_22] (rows=20 width=80) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_8] + Select Operator [SEL_6] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=1 width=89) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_19] + Select Operator [SEL_17] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_16] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) + TableScan [TS_11] (rows=20 width=83) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -1348,28 +1346,26 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_12] - Select Operator [SEL_11] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_10] (rows=5 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=10 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=10 width=91) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_3] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_2] (rows=10 width=91) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Select Operator [SEL_1] (rows=20 width=83) - Output:["key","c_int","c_float"] - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_10] (rows=5 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=5 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 + Select Operator [SEL_5] (rows=10 width=91) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=10 width=91) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_2] (rows=10 width=91) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Select Operator [SEL_1] (rows=20 width=83) + Output:["key","c_int","c_float"] + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 PREHOOK: type: QUERY @@ -1676,72 +1672,74 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_39] - Group By Operator [GBY_37] (rows=1 width=101) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=1 width=101) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) - Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=3 width=56) - Output:["_col0"],keys:_col0 - Select Operator [SEL_25] (rows=6 width=70) - Output:["_col0"] - Filter Operator [FIL_50] (rows=6 width=70) - predicate:(UDFToDouble(key) > 0.0) - TableScan [TS_23] (rows=20 width=76) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Select Operator [SEL_10] (rows=1 width=93) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_9] - Select Operator [SEL_8] (rows=1 width=101) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_7] (rows=1 width=101) - predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) - Select Operator [SEL_6] (rows=1 width=101) - Output:["_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) + Select Operator [SEL_38] (rows=1 width=101) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_37] (rows=1 width=101) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=1 width=101) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 + Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) + Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=3 width=56) + Output:["_col0"],keys:_col0 + Select Operator [SEL_25] (rows=6 width=70) + Output:["_col0"] + Filter Operator [FIL_50] (rows=6 width=70) + predicate:(UDFToDouble(key) > 0.0) + TableScan [TS_23] (rows=20 width=76) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_10] (rows=1 width=93) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + Select Operator [SEL_8] (rows=1 width=101) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_7] (rows=1 width=101) + predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) + Select Operator [SEL_6] (rows=1 width=101) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_48] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=1 width=85) + Output:["_col0"],keys:_col0 + Select Operator [SEL_21] (rows=1 width=85) + Output:["_col0"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] + Select Operator [SEL_18] (rows=1 width=93) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) + Group By Operator [GBY_15] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col0 - Group By Operator [GBY_27] (rows=1 width=85) - Output:["_col0"],keys:_col0 - Select Operator [SEL_21] (rows=1 width=85) - Output:["_col0"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_20] - Select Operator [SEL_18] (rows=1 width=93) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_49] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (UDFToDouble(key) > 0.0)) - TableScan [TS_12] (rows=20 width=83) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_49] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (UDFToDouble(key) > 0.0)) + TableScan [TS_12] (rows=20 width=83) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/explainuser_2.q.out ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 5530660..370bd37 100644 --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -308,123 +308,121 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Merge Join Operator [MERGEJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col15, _col17 - Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Merge Join Operator [MERGEJOIN_96] (rows=1464 width=10) - Conds:RS_37._col4, _col6=RS_38._col2, _col4(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col4, _col6 - Merge Join Operator [MERGEJOIN_94] (rows=1331 width=10) - Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) - Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_93] (rows=1210 width=10) - Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_92] (rows=1100 width=10) - Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col2, _col4 - Merge Join Operator [MERGEJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_91] (rows=275 width=10) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Group By Operator [GBY_50] (rows=805 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 + Select Operator [SEL_47] (rows=1610 width=10) + Output:["_col12","_col20","_col2","_col13","_col21","_col3"] + Merge Join Operator [MERGEJOIN_97] (rows=1610 width=10) + Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col15, _col17 + Select Operator [SEL_40] (rows=1464 width=10) + Output:["_col14","_col15","_col17","_col6","_col7"] + Merge Join Operator [MERGEJOIN_96] (rows=1464 width=10) + Conds:RS_37._col4, _col6=RS_38._col2, _col4(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4, _col6 + Merge Join Operator [MERGEJOIN_94] (rows=1331 width=10) + Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_88] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_93] (rows=1210 width=10) + Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_87] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_92] (rows=1100 width=10) + Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col3 + Select Operator [SEL_11] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_86] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=1000 width=10) + Output:["_col1"] + Filter Operator [FIL_85] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2, _col4 + Merge Join Operator [MERGEJOIN_95] (rows=275 width=10) + Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_18] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_90] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_21] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_91] (rows=275 width=10) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=34) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_84] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1036,102 +1034,100 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Map Join Operator [MAPJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Map 2 [BROADCAST_EDGE] - BROADCAST [RS_44] - PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_91] (rows=275 width=10) - Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] - <-Map 1 [BROADCAST_EDGE] - BROADCAST [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_5] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Map Join Operator [MAPJOIN_96] (rows=1464 width=10) - Conds:MAPJOIN_94._col4, _col6=RS_38._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col14","_col15","_col17"] - <-Map 10 [BROADCAST_EDGE] - BROADCAST [RS_38] - PartitionCols:_col2, _col4 - Map Join Operator [MAPJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=SEL_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col5"] - <-Map 9 [BROADCAST_EDGE] - BROADCAST [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_23] (rows=250 width=10) + Group By Operator [GBY_50] (rows=805 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 + Select Operator [SEL_47] (rows=1610 width=10) + Output:["_col12","_col20","_col2","_col13","_col21","_col3"] + Map Join Operator [MAPJOIN_97] (rows=1610 width=10) + Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_44] + PartitionCols:_col1, _col3 + Map Join Operator [MAPJOIN_91] (rows=275 width=10) + Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=34) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Select Operator [SEL_5] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_84] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_40] (rows=1464 width=10) + Output:["_col14","_col15","_col17","_col6","_col7"] + Map Join Operator [MAPJOIN_96] (rows=1464 width=10) + Conds:MAPJOIN_94._col4, _col6=RS_38._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col14","_col15","_col17"] + <-Map 10 [BROADCAST_EDGE] + BROADCAST [RS_38] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_95] (rows=275 width=10) + Conds:RS_24._col0=SEL_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col5"] + <-Map 9 [BROADCAST_EDGE] + BROADCAST [RS_24] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_18] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_23] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_90] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_21] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) + Conds:MAPJOIN_93._col3=RS_35._col1(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 8 [BROADCAST_EDGE] + BROADCAST [RS_35] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_88] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) + Conds:MAPJOIN_92._col2=RS_32._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 7 [BROADCAST_EDGE] + BROADCAST [RS_32] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) + Filter Operator [FIL_87] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) - Conds:MAPJOIN_93._col3=RS_35._col1(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 8 [BROADCAST_EDGE] - BROADCAST [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) + <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) + Conds:SEL_8._col1=RS_29._col3(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 6 [BROADCAST_EDGE] + BROADCAST [RS_29] + PartitionCols:_col3 + Select Operator [SEL_11] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_86] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_8] (rows=1000 width=10) Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) - Conds:MAPJOIN_92._col2=RS_32._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 7 [BROADCAST_EDGE] - BROADCAST [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) - Conds:SEL_8._col1=RS_29._col3(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 6 [BROADCAST_EDGE] - BROADCAST [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_85] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out index 7f00b06..875bdde 100644 --- ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out @@ -790,6 +790,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -825,22 +826,33 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) - keys: _col0 (type: int), _col1 (type: int) - mode: complete + keys: _col1 (type: int), _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/tez/vectorization_14.q.out ql/src/test/results/clientpositive/tez/vectorization_14.q.out index 2a59833..8b9d98f 100644 --- ql/src/test/results/clientpositive/tez/vectorization_14.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_14.q.out @@ -95,14 +95,14 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized @@ -110,12 +110,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index bc23b36..760129d 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -785,7 +785,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -820,20 +821,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) - keys: _col0 (type: int), _col1 (type: int) - mode: complete + keys: _col1 (type: int), _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -842,6 +839,30 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 6d4f13a..d809808 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -89,26 +89,26 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator