diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java index 762f734..daf9698 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java @@ -288,9 +288,6 @@ private void visit(SparkTran child, Set seen, List result) * @param child */ public void connect(SparkTran parent, SparkTran child) { - if (getChildren(parent).contains(child)) { - throw new IllegalStateException("Connection already exists"); - } rootTrans.remove(child); leafTrans.remove(parent); if (transGraph.get(parent) == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/OperatorComparatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/OperatorComparatorFactory.java index 3518823..da4d190 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/OperatorComparatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/OperatorComparatorFactory.java @@ -92,11 +92,11 @@ comparatorMapping.put(VectorSparkHashTableSinkOperator.class, new SparkHashTableSinkOperatorComparator()); comparatorMapping.put(LateralViewJoinOperator.class, new LateralViewJoinOperatorComparator()); - comparatorMapping.put(VectorGroupByOperator.class, new GroupByOperatorComparator()); + comparatorMapping.put(VectorGroupByOperator.class, new VectorGroupByOperatorComparator()); comparatorMapping.put(CommonMergeJoinOperator.class, new MapJoinOperatorComparator()); comparatorMapping.put(VectorFilterOperator.class, new FilterOperatorComparator()); comparatorMapping.put(UDTFOperator.class, new UDTFOperatorComparator()); - comparatorMapping.put(VectorSelectOperator.class, new SelectOperatorComparator()); + comparatorMapping.put(VectorSelectOperator.class, new VectorSelectOperatorComparator()); comparatorMapping.put(VectorLimitOperator.class, new LimitOperatorComparator()); comparatorMapping.put(ScriptOperator.class, new ScriptOperatorComparator()); comparatorMapping.put(TemporaryHashSinkOperator.class, new HashTableSinkOperatorComparator()); @@ -196,6 +196,25 @@ public boolean equals(SelectOperator op1, SelectOperator op2) { } } + static class VectorSelectOperatorComparator implements OperatorComparator { + + @Override + public boolean equals(VectorSelectOperator op1, VectorSelectOperator op2) { + Preconditions.checkNotNull(op1); + Preconditions.checkNotNull(op2); + SelectDesc op1Conf = op1.getConf(); + SelectDesc op2Conf = op2.getConf(); + + if (compareString(op1Conf.getColListString(), op2Conf.getColListString()) && + compareObject(op1Conf.getOutputColumnNames(), op2Conf.getOutputColumnNames()) && + compareString(op1Conf.explainNoCompute(), op2Conf.explainNoCompute())) { + return true; + } else { + return false; + } + } + } + static class FilterOperatorComparator implements OperatorComparator { @Override @@ -237,6 +256,29 @@ public boolean equals(GroupByOperator op1, GroupByOperator op2) { } } + static class VectorGroupByOperatorComparator implements OperatorComparator { + + @Override + public boolean equals(VectorGroupByOperator op1, VectorGroupByOperator op2) { + Preconditions.checkNotNull(op1); + Preconditions.checkNotNull(op2); + GroupByDesc op1Conf = op1.getConf(); + GroupByDesc op2Conf = op2.getConf(); + + if (compareString(op1Conf.getModeString(), op2Conf.getModeString()) && + compareString(op1Conf.getKeyString(), op2Conf.getKeyString()) && + compareObject(op1Conf.getOutputColumnNames(), op2Conf.getOutputColumnNames()) && + op1Conf.pruneGroupingSetId() == op2Conf.pruneGroupingSetId() && + compareObject(op1Conf.getAggregatorStrings(), op2Conf.getAggregatorStrings()) && + op1Conf.getBucketGroup() == op2Conf.getBucketGroup()) { + return true; + } else { + return false; + } + } + } + + static class ReduceSinkOperatorComparator implements OperatorComparator { @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/CombineEquivalentWorkResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/CombineEquivalentWorkResolver.java index b7c57e8..9c4c25e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/CombineEquivalentWorkResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/CombineEquivalentWorkResolver.java @@ -210,16 +210,6 @@ private boolean compareWork(BaseWork first, BaseWork second, SparkWork sparkWork return false; } - // If these two Works share the same child, we can not combine them as SparkPlan does not - // support multi edge between two Works. - List firstChildren = sparkWork.getChildren(first); - List secondChildren = sparkWork.getChildren(second); - for (BaseWork child : firstChildren) { - if (secondChildren.contains(child)) { - return false; - } - } - Set> firstRootOperators = first.getAllRootOperators(); Set> secondRootOperators = second.getAllRootOperators(); if (firstRootOperators.size() != secondRootOperators.size()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 37012b4..7bda4ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -220,6 +220,10 @@ public void setReversedExprs(Map reversedExprs) { */ @Explain(displayName = "keys", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getKeysString() { + if (joinKeys == null) { + return null; + } + Map keyMap = new LinkedHashMap(); for (byte i = 0; i < joinKeys.length; i++) { keyMap.put(i, PlanUtils.getExprListString(Arrays.asList(joinKeys[i]))); diff --git a/ql/src/test/queries/clientpositive/dynamic_rdd_cache.q b/ql/src/test/queries/clientpositive/dynamic_rdd_cache.q index a380b15..2f1582a 100644 --- a/ql/src/test/queries/clientpositive/dynamic_rdd_cache.q +++ b/ql/src/test/queries/clientpositive/dynamic_rdd_cache.q @@ -97,6 +97,13 @@ ORDER BY inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov ; +EXPLAIN +WITH test AS +(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory + UNION ALL + SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) +SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand; + DROP TABLE inv; DROP TABLE inventory; DROP TABLE item; diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index bc716a0..69fe396 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -1309,6 +1309,75 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN +WITH test AS +(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory + UNION ALL + SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) +SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +WITH test AS +(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory + UNION ALL + SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) +SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: DROP TABLE inv PREHOOK: type: DROPTABLE PREHOOK: Input: default@inv diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index 90085a8..ee9f448 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -97,27 +97,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 505cc59..61563e4 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -964,6 +964,65 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN +WITH test AS +(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory + UNION ALL + SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) +SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +WITH test AS +(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory + UNION ALL + SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) +SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 1 (PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: DROP TABLE inv PREHOOK: type: DROPTABLE PREHOOK: Input: default@inv diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index 155515d..8d92911 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -85,24 +85,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/spark/union15.q.out b/ql/src/test/results/clientpositive/spark/union15.q.out index 6be13c9..cb8bc75 100644 --- a/ql/src/test/results/clientpositive/spark/union15.q.out +++ b/ql/src/test/results/clientpositive/spark/union15.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Map 5 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 3 <- Map 4 (GROUP, 2), Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -67,27 +67,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union16.q.out b/ql/src/test/results/clientpositive/spark/union16.q.out index 5e2c77b..39ba7b9 100644 --- a/ql/src/test/results/clientpositive/spark/union16.q.out +++ b/ql/src/test/results/clientpositive/spark/union16.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1), Map 10 (GROUP, 1), Map 11 (GROUP, 1), Map 12 (GROUP, 1), Map 13 (GROUP, 1), Map 14 (GROUP, 1), Map 15 (GROUP, 1), Map 16 (GROUP, 1), Map 17 (GROUP, 1), Map 18 (GROUP, 1), Map 19 (GROUP, 1), Map 20 (GROUP, 1), Map 21 (GROUP, 1), Map 22 (GROUP, 1), Map 23 (GROUP, 1), Map 24 (GROUP, 1), Map 25 (GROUP, 1), Map 26 (GROUP, 1), Map 3 (GROUP, 1), Map 4 (GROUP, 1), Map 5 (GROUP, 1), Map 6 (GROUP, 1), Map 7 (GROUP, 1), Map 8 (GROUP, 1), Map 9 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -93,438 +93,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 10 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 11 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 12 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 13 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 14 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 15 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 16 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 17 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 18 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 19 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 20 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 21 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 22 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 23 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 24 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 25 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 26 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 8 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 9 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union2.q.out b/ql/src/test/results/clientpositive/spark/union2.q.out index e4afb1b..3c5b075 100644 --- a/ql/src/test/results/clientpositive/spark/union2.q.out +++ b/ql/src/test/results/clientpositive/spark/union2.q.out @@ -20,7 +20,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1), Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,24 +41,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union25.q.out b/ql/src/test/results/clientpositive/spark/union25.q.out index 5193c06..a8b89d6 100644 --- a/ql/src/test/results/clientpositive/spark/union25.q.out +++ b/ql/src/test/results/clientpositive/spark/union25.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2), Map 5 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 2), Map 3 (GROUP, 2) Reducer 2 <- Map 1 (GROUP, 2), Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: @@ -110,25 +110,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union9.q.out b/ql/src/test/results/clientpositive/spark/union9.q.out index d420ef1..92499c0 100644 --- a/ql/src/test/results/clientpositive/spark/union9.q.out +++ b/ql/src/test/results/clientpositive/spark/union9.q.out @@ -22,7 +22,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1), Map 3 (GROUP, 1), Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,42 +43,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator