diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java new file mode 100644 index 0000000..f73affc --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.rules.JoinCommuteRule; +import org.apache.calcite.util.Permutation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +/** + * Planner rule that permutes the inputs of a Join, if it has a Project on top + * that simply swaps the fields of both inputs. + */ +public class HiveJoinCommuteRule extends RelOptRule { + + public static final HiveJoinCommuteRule INSTANCE = new HiveJoinCommuteRule( + HiveProject.class, HiveJoin.class); + + + public HiveJoinCommuteRule(Class projClazz, + Class joinClazz) { + super(operand(projClazz, + operand(joinClazz, any()))); + } + + public void onMatch(final RelOptRuleCall call) { + Project topProject = call.rel(0); + Join join = call.rel(1); + + // 1. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Permutation topPermutation = topProject.getPermutation(); + if (topPermutation == null) { + return; + } + if (topPermutation.isIdentity()) { + return; + } + + // 2. We swap the join + final RelNode swapped = JoinCommuteRule.swap(join,true); + if (swapped == null) { + return; + } + + // 3. The result should have a project on top, otherwise we + // bail out. + if (swapped instanceof Join) { + return; + } + + // 4. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Project bottomProject = (Project) swapped; + final Permutation bottomPermutation = bottomProject.getPermutation(); + if (bottomPermutation == null) { + return; + } + if (bottomPermutation.isIdentity()) { + return; + } + + // 5. If the product of the topPermutation and bottomPermutation yields + // the identity, then we can swap the join and remove the project on + // top. + final Permutation product = topPermutation.product(bottomPermutation); + if (!product.isIdentity()) { + return; + } + + // 6. Return the new join as a replacement + final Join swappedJoin = (Join) bottomProject.getInput(0); + call.transformTo(swappedJoin); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7fd8c85..3b5dbe2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -65,7 +65,6 @@ import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; -import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; import org.apache.calcite.rel.rules.ProjectMergeRule; @@ -139,6 +138,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; @@ -862,7 +862,16 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteOptimizedPlan = hepPlanner.findBestExp(); - // 4. Run rule to fix windowing issue when it is done over + // 4. Run rule to try to remove projects on top of join operators + hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); + hepPgmBldr.addRuleInstance(HiveJoinCommuteRule.INSTANCE); + hepPlanner = new HepPlanner(hepPgmBldr.build()); + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + + // 5. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveWindowingFixRule.INSTANCE); @@ -872,9 +881,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPlanner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = hepPlanner.findBestExp(); - // 5. Run rules to aid in translation from Calcite tree to Hive tree + // 6. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - // 5.1. Merge join into multijoin operators (if possible) + // 6.1. Merge join into multijoin operators (if possible) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveJoinToMultiJoinRule.INSTANCE); hepPgmBldr = hepPgmBldr.addRuleCollection(ImmutableList.of( @@ -894,7 +903,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); - // 5.2. Introduce exchange operators below join/multijoin operators + // 6.2. Introduce exchange operators below join/multijoin operators hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index e97d7e6..7d8db0a 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -32,7 +32,7 @@ STAGE PLANS: $hdt$_0:$hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -51,7 +51,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -59,8 +59,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -77,8 +77,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,33 +86,29 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col1,_col3) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col1,_col3) (type: int) + Group By Operator + aggregations: sum(_col0) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out index 69b7aab..3209d07 100644 --- a/ql/src/test/results/clientpositive/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -46,11 +46,11 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -81,14 +81,14 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out index 40d2dd4..b52b475 100644 --- a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out +++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out @@ -80,20 +80,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -108,9 +94,23 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 value expressions: _col1 (type: string) auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,8 +203,8 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src [$hdt$_1:a] - /src1 [$hdt$_0:b] + /src [$hdt$_0:a] + /src1 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -213,10 +213,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '429' (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) + expressions: '429' (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out index d5f45da..43ba27d 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -23,14 +23,14 @@ JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -85,7 +85,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -109,10 +109,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -127,13 +134,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -143,18 +143,14 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -259,20 +255,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -289,44 +285,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -353,17 +328,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/correlationoptimizer6.q.out index 85e447c..6b003d5 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer6.q.out @@ -38,34 +38,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -74,13 +74,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -96,7 +96,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -104,7 +104,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -120,14 +120,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -138,51 +138,47 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -191,13 +187,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -213,7 +209,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -221,7 +217,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -296,64 +292,64 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE @@ -383,17 +379,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -420,17 +412,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -510,14 +498,14 @@ STAGE PLANS: $hdt$_0:$hdt$_0:$hdt$_0:x TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -525,14 +513,14 @@ STAGE PLANS: $hdt$_1:$hdt$_1:$hdt$_1:x TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -543,14 +531,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -558,29 +546,29 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -588,18 +576,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work @@ -622,17 +610,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -649,17 +633,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -723,12 +703,12 @@ JOIN ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -767,17 +747,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan alias: xx Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -792,6 +765,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -801,17 +781,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -875,6 +851,21 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: xx + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -896,24 +887,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - TableScan - alias: xx - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -930,31 +923,6 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Mux Operator - Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -1023,13 +991,13 @@ JOIN ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1084,7 +1052,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1108,17 +1076,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan alias: xx Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1133,6 +1094,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1142,17 +1110,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1216,20 +1180,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: xx + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1246,23 +1210,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: xx - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 514 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -1289,38 +1270,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Mux Operator - Statistics: Num rows: 514 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1382,13 +1338,13 @@ JOIN src yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1443,7 +1399,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1467,10 +1423,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1485,13 +1448,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1501,17 +1457,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1597,20 +1549,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1627,44 +1579,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -1691,17 +1622,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3267,35 +3211,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3303,14 +3246,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -3323,18 +3266,18 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -3350,15 +3293,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -3368,16 +3311,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -3405,34 +3344,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3440,14 +3380,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -3460,18 +3400,18 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -3537,13 +3477,13 @@ JOIN ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -3599,7 +3539,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -3623,17 +3563,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: bigint) - TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3663,30 +3596,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 164 Data size: 1653 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -3713,17 +3632,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 164 Data size: 1653 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3784,20 +3716,20 @@ ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:x + $hdt$_1:$hdt$_1:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:x + $hdt$_1:$hdt$_1:$hdt$_1:x TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -3813,7 +3745,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -3865,11 +3797,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -3885,7 +3817,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -3930,27 +3862,6 @@ STAGE PLANS: Reduce Operator Tree: Demux Operator Statistics: Num rows: 151 Data size: 1563 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 226 Data size: 2339 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -3967,17 +3878,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 226 Data size: 2339 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join12.q.out b/ql/src/test/results/clientpositive/join12.q.out index df340a8..bdb5496 100644 --- a/ql/src/test/results/clientpositive/join12.q.out +++ b/ql/src/test/results/clientpositive/join12.q.out @@ -40,15 +40,14 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -56,14 +55,15 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -71,18 +71,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce diff --git a/ql/src/test/results/clientpositive/join5.q.out b/ql/src/test/results/clientpositive/join5.q.out index f83ff73..0398655 100644 --- a/ql/src/test/results/clientpositive/join5.q.out +++ b/ql/src/test/results/clientpositive/join5.q.out @@ -53,45 +53,45 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index b73643e..a8bd4df 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -21,54 +21,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out index a3afbec..b42bdc5 100644 --- a/ql/src/test/results/clientpositive/join_merging.q.out +++ b/ql/src/test/results/clientpositive/join_merging.q.out @@ -10,8 +10,7 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -20,19 +19,6 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -46,36 +32,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int) + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -91,19 +60,21 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 + Right Outer Join1 to 2 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -137,19 +108,6 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -163,28 +121,37 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > (_col1 + 10)) (type: boolean) + predicate: (_col1 > (_col3 + 10)) (type: boolean) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/join_nulls.q.out b/ql/src/test/results/clientpositive/join_nulls.q.out index 46e0170..b536985 100644 --- a/ql/src/test/results/clientpositive/join_nulls.q.out +++ b/ql/src/test/results/clientpositive/join_nulls.q.out @@ -54,7 +54,7 @@ POSTHOOK: Input: default@myinput1 NULL 35 100 100 NULL 35 48 NULL NULL 35 NULL 35 -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git a/ql/src/test/results/clientpositive/limit_pushdown.q.out b/ql/src/test/results/clientpositive/limit_pushdown.q.out index 6ace047..d77bc60 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -833,10 +833,10 @@ on subq.key=subq2.key limit 4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -870,35 +870,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -906,7 +882,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -914,14 +890,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -930,24 +906,20 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Limit + Number of rows: 4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -978,11 +950,35 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/lineage3.q.out b/ql/src/test/results/clientpositive/lineage3.q.out index 5c392fa..1afe92d 100644 --- a/ql/src/test/results/clientpositive/lineage3.q.out +++ b/ql/src/test/results/clientpositive/lineage3.q.out @@ -1424,46 +1424,55 @@ POSTHOOK: LINEAGE: { "edgeType": "PROJECTION" }, { - "sources": [], + "sources": [ + 2 + ], "targets": [ 0, 1 ], - "expression": "(1 = 1)", + "expression": "(UDFToDouble(a.key) > UDFToDouble(300))", "edgeType": "PREDICATE" }, { - "sources": [ - 4 + "sources": [], + "targets": [ + 0, + 1 ], + "expression": "sq_corr_0 is null", + "edgeType": "PREDICATE" + }, + { + "sources": [], "targets": [ 0, 1 ], - "expression": "(UDFToInteger(b.ctinyint) + 300) is null", + "expression": "(1 = 1)", "edgeType": "PREDICATE" }, { "sources": [ - 2 + 2, + 4 ], "targets": [ 0, 1 ], - "expression": "(UDFToDouble(a.key) > UDFToDouble(300))", + "expression": "(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))", "edgeType": "PREDICATE" }, { "sources": [ - 4, - 2 + 4 ], "targets": [ 0, 1 ], - "expression": "(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))", + "expression": "(UDFToInteger(b.ctinyint) + 300) is null", "edgeType": "PREDICATE" } ], diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index c46792b..1b2b8e3 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -113,42 +113,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -292,48 +292,44 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:$hdt$_0:b] + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) (type: boolean) + predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/optional_outer.q.out b/ql/src/test/results/clientpositive/optional_outer.q.out index 8616644..c1256f6 100644 --- a/ql/src/test/results/clientpositive/optional_outer.q.out +++ b/ql/src/test/results/clientpositive/optional_outer.q.out @@ -161,23 +161,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -226,23 +222,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out index b6ca16f..bfe983a 100644 --- a/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out +++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out @@ -115,36 +115,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -378,55 +378,51 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Outer Join 0 to 1 filter mappings: - 0 [1, 1] + 1 [0, 1] filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + 0 + 1 {(VALUE._col1 = '2008-04-08')} keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) and (UDFToDouble(_col3) > 10.0)) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) and (UDFToDouble(_col2) > 15.0)) and (UDFToDouble(_col2) < 25.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -587,42 +583,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -766,48 +762,44 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/ppd_gby_join.q.out index 579c827..1acfc3d 100644 --- a/ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -35,30 +35,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -83,6 +59,30 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -92,29 +92,25 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -331,32 +327,32 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -367,10 +363,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '50') or (_col1 < '50')) (type: boolean) + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) + expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/ppd_join.q.out b/ql/src/test/results/clientpositive/ppd_join.q.out index ae5fb27..2186a54 100644 --- a/ql/src/test/results/clientpositive/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_join.q.out @@ -32,31 +32,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -81,6 +56,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -90,24 +90,20 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -583,33 +579,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -620,10 +616,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '50') or (_col2 < '50')) (type: boolean) + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index 88624ea..27a8ddf 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -39,51 +39,51 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 <> '14') (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '14') (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -94,26 +94,22 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -1758,33 +1754,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -1796,10 +1792,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) + predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 6c5c0da..abc286c 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -39,31 +39,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -88,6 +63,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -97,29 +97,25 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + Filter Operator + predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -1820,33 +1816,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1857,10 +1853,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '10') or (_col2 <> '10')) (type: boolean) + predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out index 18e0154..2d1333b 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -27,12 +27,12 @@ EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -101,10 +101,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -125,13 +132,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -140,18 +140,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -440,12 +436,12 @@ POSTHOOK: query: EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -496,10 +492,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -514,13 +517,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -529,18 +525,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 3 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ppd_random.q.out b/ql/src/test/results/clientpositive/ppd_random.q.out index 5e23a1c..ff9a812 100644 --- a/ql/src/test/results/clientpositive/ppd_random.q.out +++ b/ql/src/test/results/clientpositive/ppd_random.q.out @@ -31,8 +31,8 @@ STAGE PLANS: predicate: (key > '2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) @@ -42,7 +42,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -50,8 +49,8 @@ STAGE PLANS: predicate: (key > '2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) @@ -61,6 +60,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -68,26 +68,22 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rand() > 0.5) (type: boolean) + Filter Operator + predicate: (rand() > 0.5) (type: boolean) + Statistics: Num rows: 30 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 319 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 319 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 319 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -128,15 +124,14 @@ STAGE PLANS: predicate: (key > '2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -144,14 +139,15 @@ STAGE PLANS: predicate: (key > '2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -159,26 +155,22 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rand() > 0.5) (type: boolean) + Filter Operator + predicate: (rand() > 0.5) (type: boolean) + Statistics: Num rows: 60 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 60 Data size: 639 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 639 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/rcfile_null_value.q.out index a1e6f4f..c90287c 100644 --- a/ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -100,45 +100,45 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index 70d7542..b7d593f 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -113,44 +113,44 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -384,55 +384,51 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 filter mappings: - 0 [1, 1] + 1 [0, 1] filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + 0 + 1 {(VALUE._col1 = '2008-04-08')} keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -970,42 +966,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1149,48 +1145,44 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 1b56d5f..22a9421 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -959,15 +959,14 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -975,14 +974,15 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -991,18 +991,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Conditional Operator @@ -1031,17 +1027,13 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/auto_join12.q.out b/ql/src/test/results/clientpositive/spark/auto_join12.q.out index 1bddcfc..1f480bb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -57,8 +57,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -82,8 +82,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -91,37 +91,33 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 4 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - input vertices: - 0 Map 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col1,_col3) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col1,_col3) (type: int) + Group By Operator + aggregations: sum(_col0) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/auto_join5.q.out b/ql/src/test/results/clientpositive/spark/auto_join5.q.out index 08e9ae6..285450f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -70,7 +70,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src1 @@ -84,16 +84,16 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 0 Map 1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join12.q.out b/ql/src/test/results/clientpositive/spark/join12.q.out index b62334d..38b450e 100644 --- a/ql/src/test/results/clientpositive/spark/join12.q.out +++ b/ql/src/test/results/clientpositive/spark/join12.q.out @@ -62,15 +62,14 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -80,14 +79,15 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -117,18 +117,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join5.q.out b/ql/src/test/results/clientpositive/spark/join5.q.out index 1914ad0..66451ab 100644 --- a/ql/src/test/results/clientpositive/spark/join5.q.out +++ b/ql/src/test/results/clientpositive/spark/join5.q.out @@ -58,17 +58,17 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 3 Map Operator Tree: @@ -76,30 +76,30 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index a18d82e..3b3358f 100644 --- a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -26,49 +26,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -76,9 +64,9 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out index ce26543..6c5089b 100644 --- a/ql/src/test/results/clientpositive/spark/join_merging.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -16,8 +16,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -25,21 +24,6 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 4 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -53,7 +37,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 5 + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Map 4 Map Operator Tree: TableScan alias: p1 @@ -71,39 +70,21 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 + Right Outer Join1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 2 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,21 +123,6 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 4 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -170,6 +136,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Map 5 Map Operator Tree: TableScan @@ -188,25 +169,21 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > (_col1 + 10)) (type: boolean) + predicate: (_col1 > (_col3 + 10)) (type: boolean) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int) + value expressions: _col1 (type: int), _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 7101beb..d3a0bda 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -883,10 +883,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -911,7 +911,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -942,32 +942,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -975,23 +961,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Limit + Number of rows: 4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1000,16 +982,30 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 190485f..c3eeb23 100644 --- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -118,22 +118,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -142,12 +142,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -155,13 +152,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -171,29 +166,59 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -232,39 +257,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -272,11 +272,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -286,66 +288,60 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) (type: boolean) + predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -999,25 +995,88 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1113,39 +1172,60 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: #### A masked pattern was here #### - Path -> Partition: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1153,11 +1233,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1167,62 +1249,66 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1242,6 +1328,8 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -1255,6 +1343,8 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out index 0614cb9..588d2ed 100644 --- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out +++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out @@ -120,32 +120,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -153,13 +146,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -169,29 +160,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -232,12 +245,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -278,12 +291,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -322,37 +335,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -360,11 +350,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -374,26 +366,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -401,21 +393,21 @@ STAGE PLANS: condition map: Outer Join 0 to 1 filter mappings: - 0 [1, 1] + 1 [0, 1] filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + 0 + 1 {(VALUE._col1 = '2008-04-08')} keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) and (UDFToDouble(_col3) > 10.0)) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((((_col5 > 15) and (_col5 < 25)) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -604,22 +596,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -628,12 +620,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -641,13 +630,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -657,29 +644,59 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -718,39 +735,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -758,11 +750,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -772,66 +766,60 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 31b25b3..b3ebea9 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -40,32 +40,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -90,6 +64,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -100,29 +100,25 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -334,34 +330,34 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -373,10 +369,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '50') or (_col1 < '50')) (type: boolean) + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) + expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out index b97431c..42a83f3 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -37,33 +37,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -88,6 +61,33 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -98,24 +98,20 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -596,35 +592,35 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -636,10 +632,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '50') or (_col2 < '50')) (type: boolean) + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out index cf81423..a98ab13 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -71,26 +71,26 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 <> '14') (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 5 Map Operator Tree: @@ -98,26 +98,26 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '14') (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -150,26 +150,22 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator @@ -1780,17 +1776,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 5 Map Operator Tree: @@ -1798,17 +1794,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and key is not null) and value is not null) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '305') and (key <> '14')) and (key <> '302')) and (key <> '311')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -1842,10 +1838,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) + predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index d2343c4..9b5e0af 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -70,33 +70,6 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> '4') (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -121,6 +94,33 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -152,29 +152,25 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + Filter Operator + predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -1842,35 +1838,35 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((((key <> '12') and (key <> '4')) and (key <> '11')) and (key > '0')) and (key < '400')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1903,10 +1899,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '10') or (_col2 <> '10')) (type: boolean) + predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 2abca78..db5914c 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -34,8 +34,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -44,14 +44,14 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + predicate: ((((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '10') and ((_col0 < '20') and ((_col0 > '15') and (_col0 < '25')))) (type: boolean) + predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) @@ -61,20 +61,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Map 3 + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: ((((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean) + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) @@ -91,14 +92,14 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: ((((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + predicate: ((_col0 > '10') and ((_col0 < '20') and ((_col0 > '15') and (_col0 < '25')))) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) @@ -108,7 +109,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -117,27 +117,6 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -152,6 +131,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -447,8 +443,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -457,24 +453,25 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + predicate: ((((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Map 3 + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: ((((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -492,18 +489,17 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: ((((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -512,20 +508,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -533,14 +524,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index 12f1abb..268d482 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -118,36 +118,33 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -155,13 +152,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -171,29 +166,59 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -234,12 +259,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -280,12 +305,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -324,41 +349,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -366,11 +364,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -380,71 +380,67 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 filter mappings: - 0 [1, 1] + 1 [0, 1] filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 + 0 + 1 {(VALUE._col1 = '2008-04-08')} keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -999,22 +995,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -1023,12 +1019,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1036,13 +1029,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1052,29 +1043,59 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1113,39 +1134,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1153,11 +1149,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1167,66 +1165,60 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index ec74786..828a64f 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -1031,15 +1031,14 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan @@ -1049,14 +1048,15 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -1066,18 +1066,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -1109,17 +1105,13 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 9f93574..1c49f52 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -498,46 +498,46 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_47] + Reducer 7 + File Output Operator [FS_45] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_46] + Select Operator [SEL_44] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_45] + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_43] key expressions:(UDFToLong(_col0) + _col1) (type: bigint), _col1 (type: bigint) sort order:-+ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col0 (type: int), _col2 (type: bigint) - Group By Operator [GBY_43] + Group By Operator [GBY_41] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_42] + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_40] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_41] + Group By Operator [GBY_39] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] @@ -545,137 +545,137 @@ Stage-0 Select Operator [SEL_37] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_53] - predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Filter Operator [FIL_36] + predicate:((_col1 > 0) or (_col6 >= 0)) (type: boolean) Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_59] + Merge Join Operator [MERGEJOIN_55] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col3","_col4"] + | outputColumnNames:["_col1","_col2","_col6"] | Statistics:Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_33] + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_34] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_1] + | Select Operator [SEL_30] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_54] + | Filter Operator [FIL_53] | predicate:key is not null (type: boolean) | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] + | TableScan [TS_29] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_35] + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_32] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_29] + Select Operator [SEL_28] outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_55] - predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_27] + predicate:((_col1 + _col4) >= 0) (type: boolean) Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_58] + Merge Join Operator [MERGEJOIN_54] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] + | outputColumnNames:["_col0","_col1","_col2","_col4"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 11 [SIMPLE_EDGE] - | Reduce Output Operator [RS_27] + |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_25] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: bigint) - | Select Operator [SEL_22] - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Reducer 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_21] - | key expressions:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: bigint) - | Select Operator [SEL_20] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_19] + | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_20] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col3 (type: double), _col2 (type: bigint) + | sort order:-+ + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: int) + | Select Operator [SEL_18] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_17] | | aggregations:["sum(VALUE._col0)"] | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) | | outputColumnNames:["_col0","_col1","_col2","_col3"] | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 9 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] + | |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) | sort order:+++ | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col3 (type: bigint) - | Group By Operator [GBY_17] + | Group By Operator [GBY_15] | aggregations:["sum(_col1)"] | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_15] + | Select Operator [SEL_13] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_57] + | Filter Operator [FIL_52] | predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_13] - | alias:cbo_t1 + | TableScan [TS_11] + | alias:cbo_t2 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_25] + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int) - Select Operator [SEL_11] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_10] - key expressions:_col3 (type: double), _col2 (type: bigint) - sort order:-+ - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: string), _col1 (type: int) - Select Operator [SEL_9] - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_8] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_6] | aggregations:["sum(VALUE._col0)"] | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_7] + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) sort order:+++ Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col3 (type: bigint) - Group By Operator [GBY_6] + Group By Operator [GBY_4] aggregations:["sum(_col1)"] keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3"] Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_4] + Select Operator [SEL_2] outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_56] + Filter Operator [FIL_51] predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_2] - alias:cbo_t2 + TableScan [TS_0] + alias:cbo_t1 Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc @@ -686,168 +686,168 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_45] + Reducer 7 + File Output Operator [FS_44] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_44] + Select Operator [SEL_43] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_43] + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_42] key expressions:(UDFToLong(_col0) % _col1) (type: bigint), _col0 (type: int) sort order:+- Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: bigint), _col2 (type: bigint) - Group By Operator [GBY_41] + Group By Operator [GBY_40] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_40] + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_39] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_39] + Group By Operator [GBY_38] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_35] + Select Operator [SEL_34] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_49] + Filter Operator [FIL_48] predicate:((((_col6 > 0) and ((_col6 >= 1) or (_col2 >= 1))) and ((UDFToLong(_col6) + _col2) >= 0)) and ((_col1 > 0) or (_col6 >= 0))) (type: boolean) Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_55] + Merge Join Operator [MERGEJOIN_53] | condition map:[{"":"Left Outer Join0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col1","_col2","_col6"] | Statistics:Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_32] + | Reduce Output Operator [RS_31] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_30] + | Select Operator [SEL_29] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_29] + | TableScan [TS_28] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_31] + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_30] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_26] + Select Operator [SEL_27] outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_50] - predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_26] + predicate:((_col1 + _col4) >= 0) (type: boolean) Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_54] - | condition map:[{"":"Right Outer Join0 to 1"}] + Merge Join Operator [MERGEJOIN_52] + | condition map:[{"":"Left Outer Join0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] + | outputColumnNames:["_col0","_col1","_col2","_col4"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] + |<-Reducer 3 [SIMPLE_EDGE] | Reduce Output Operator [RS_23] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int) - | Select Operator [SEL_9] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_8] - | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | sort order:+++ - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_6] - | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_51] - | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t2 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_11] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_10] + | key expressions:_col3 (type: bigint), _col1 (type: int) + | sort order:+- + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col2 (type: bigint) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_6] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_49] + | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 9 [SIMPLE_EDGE] Reduce Output Operator [RS_24] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_21] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_20] - key expressions:_col3 (type: bigint), _col1 (type: int) - sort order:+- - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: string), _col2 (type: bigint) - Select Operator [SEL_19] - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_18] - | aggregations:["sum(VALUE._col0)"] - | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_17] - key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - sort order:+++ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col3 (type: bigint) - Group By Operator [GBY_16] - aggregations:["sum(_col1)"] - keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_12] - outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_52] - predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_10] - alias:cbo_t1 - Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_22] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_21] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_19] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_50] + predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_13] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -857,155 +857,139 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 5 - File Output Operator [FS_39] + File Output Operator [FS_36] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_37] + Group By Operator [GBY_34] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_36] + Reduce Output Operator [RS_33] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_35] + Group By Operator [GBY_32] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_33] + Select Operator [SEL_30] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_43] + Filter Operator [FIL_29] predicate:(((_col1 + _col4) >= 2) and ((_col1 > 0) or (_col6 >= 0))) (type: boolean) Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_48] - | condition map:[{"":"Right Outer Join0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + Merge Join Operator [MERGEJOIN_41] + | condition map:[{"":"Right Outer Join0 to 1"},{"":"Right Outer Join0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col1","_col2","_col4","_col6"] | Statistics:Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 9 [SIMPLE_EDGE] - | Reduce Output Operator [RS_30] + |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_27] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_28] + | Select Operator [SEL_24] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_27] + | TableScan [TS_23] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_29] + | Reduce Output Operator [RS_25] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_11] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_10] + | key expressions:_col3 (type: bigint), _col0 (type: string) + | sort order:+- + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_6] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_39] + | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_26] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint), _col4 (type: int) - Select Operator [SEL_26] - outputColumnNames:["_col0","_col1","_col2","_col4"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_47] - | condition map:[{"":"Left Outer Join0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_23] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int) - | Select Operator [SEL_9] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_8] - | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | sort order:+++ - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_6] - | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_45] - | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t2 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_24] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_21] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_20] - key expressions:_col3 (type: bigint), _col0 (type: string) - sort order:+- - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_19] - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_18] - | aggregations:["sum(VALUE._col0)"] - | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_17] - key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - sort order:+++ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col3 (type: bigint) - Group By Operator [GBY_16] - aggregations:["sum(_col1)"] - keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_12] - outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_46] - predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_10] - alias:cbo_t1 - Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_22] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_21] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_19] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_40] + predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_13] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int PREHOOK: type: QUERY @@ -1014,179 +998,163 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 - File Output Operator [FS_46] + Reducer 6 + File Output Operator [FS_43] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_45] + Select Operator [SEL_42] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_44] + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_41] key expressions:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: bigint), _col2 (type: bigint) - Group By Operator [GBY_42] + Group By Operator [GBY_39] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_41] + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_38] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_40] + Group By Operator [GBY_37] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_36] + Select Operator [SEL_33] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_48] + Filter Operator [FIL_45] predicate:(((((_col6 > 0) and ((_col6 >= 1) or (_col2 >= 1))) and ((UDFToLong(_col6) + _col2) >= 0)) and ((_col1 + _col4) >= 0)) and ((_col1 > 0) or (_col6 >= 0))) (type: boolean) Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_52] - | condition map:[{"":"Outer Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + Merge Join Operator [MERGEJOIN_48] + | condition map:[{"":"Outer Join 0 to 1"},{"":"Outer Join 0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col1","_col2","_col4","_col6"] | Statistics:Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 11 [SIMPLE_EDGE] - | Reduce Output Operator [RS_33] + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_30] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_31] + | Select Operator [SEL_27] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_30] + | TableScan [TS_26] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_32] + |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_28] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_11] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_10] + | key expressions:_col3 (type: double) + | sort order:- + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_6] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_46] + | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint), _col4 (type: int) - Select Operator [SEL_29] - outputColumnNames:["_col0","_col1","_col2","_col4"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_51] - | condition map:[{"":"Outer Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_27] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: bigint) - | Select Operator [SEL_24] - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Reducer 9 [SIMPLE_EDGE] - | Reduce Output Operator [RS_23] - | key expressions:_col3 (type: double) - | sort order:- - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col0 (type: string), _col1 (type: int), _col2 (type: bigint) - | Select Operator [SEL_22] - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_21] - | | aggregations:["sum(VALUE._col0)"] - | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 8 [SIMPLE_EDGE] - | Reduce Output Operator [RS_20] - | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | sort order:+++ - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col3 (type: bigint) - | Group By Operator [GBY_19] - | aggregations:["sum(_col1)"] - | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_15] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_50] - | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_13] - | alias:cbo_t1 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_26] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int) - Select Operator [SEL_11] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_10] - key expressions:_col3 (type: double), _col2 (type: bigint) - sort order:-+ - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: string), _col1 (type: int) - Select Operator [SEL_9] + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_24] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] + key expressions:_col3 (type: double), _col2 (type: bigint) + sort order:-+ + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string), _col1 (type: int) + Select Operator [SEL_22] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_21] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_19] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_8] - | aggregations:["sum(VALUE._col0)"] - | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - Reduce Output Operator [RS_7] - key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - sort order:+++ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col3 (type: bigint) - Group By Operator [GBY_6] - aggregations:["sum(_col1)"] - keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_2] - outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_49] - predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_0] - alias:cbo_t2 - Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_47] + predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_13] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -1195,34 +1163,34 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 - File Output Operator [FS_43] + Reducer 5 + File Output Operator [FS_41] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_41] + Group By Operator [GBY_39] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_40] + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_38] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_39] + Group By Operator [GBY_37] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] @@ -1230,116 +1198,116 @@ Stage-0 Select Operator [SEL_35] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_49] - predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Filter Operator [FIL_34] + predicate:((_col1 > 0) or (_col6 >= 0)) (type: boolean) Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_55] + Merge Join Operator [MERGEJOIN_51] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col3","_col4"] + | outputColumnNames:["_col1","_col2","_col6"] | Statistics:Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_31] + |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_32] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_1] + | Select Operator [SEL_28] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_50] + | Filter Operator [FIL_49] | predicate:key is not null (type: boolean) | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] + | TableScan [TS_27] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_33] + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_30] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_27] + Select Operator [SEL_26] outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_51] - predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_25] + predicate:((_col1 + _col4) >= 0) (type: boolean) Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_54] + Merge Join Operator [MERGEJOIN_50] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] + | outputColumnNames:["_col0","_col1","_col2","_col4"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 5 [SIMPLE_EDGE] - | Reduce Output Operator [RS_23] + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int) - | Select Operator [SEL_11] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_10] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | aggregations:["sum(VALUE._col0)"] | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 4 [SIMPLE_EDGE] - | Reduce Output Operator [RS_9] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) | sort order:+++ - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_8] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_6] + | aggregations:["sum(_col1)"] | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_4] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_52] + | Filter Operator [FIL_47] | predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_2] - | alias:cbo_t2 + | TableScan [TS_0] + | alias:cbo_t1 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_25] + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_21] - outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_20] - | aggregations:["sum(VALUE._col0)"] + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_19] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_18] | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_19] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) sort order:+++ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col3 (type: bigint) - Group By Operator [GBY_18] - aggregations:["sum(_col1)"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_16] keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_14] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_12] outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_53] + Filter Operator [FIL_48] predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_12] - alias:cbo_t1 + TableScan [TS_10] + alias:cbo_t2 Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc @@ -2387,49 +2355,49 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 4 - File Output Operator [FS_50] + Reducer 7 + File Output Operator [FS_48] compressed:false Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Limit [LIM_49] + Limit [LIM_47] Number of rows:5 Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_48] + Select Operator [SEL_46] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_47] + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_45] key expressions:(UDFToLong(_col0) + _col1) (type: bigint), _col1 (type: bigint) sort order:-+ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col0 (type: int), _col2 (type: bigint) - Group By Operator [GBY_45] + Group By Operator [GBY_43] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_44] + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_42] key expressions:_col0 (type: int), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_43] + Group By Operator [GBY_41] aggregations:["count()"] keys:_col0 (type: int), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] @@ -2437,149 +2405,149 @@ Stage-0 Select Operator [SEL_39] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_56] - predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Filter Operator [FIL_38] + predicate:((_col1 > 0) or (_col6 >= 0)) (type: boolean) Statistics:Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_64] + Merge Join Operator [MERGEJOIN_60] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col3","_col4"] + | outputColumnNames:["_col1","_col2","_col6"] | Statistics:Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_35] + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_36] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col1 (type: int) - | Select Operator [SEL_1] + | Select Operator [SEL_32] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_57] + | Filter Operator [FIL_58] | predicate:key is not null (type: boolean) | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] + | TableScan [TS_31] | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_37] + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_34] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: int), _col2 (type: bigint) - Select Operator [SEL_31] + Select Operator [SEL_30] outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_58] - predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_29] + predicate:((_col1 + _col4) >= 0) (type: boolean) Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_63] + Merge Join Operator [MERGEJOIN_59] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3","_col4"] + | outputColumnNames:["_col0","_col1","_col2","_col4"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 11 [SIMPLE_EDGE] - | Reduce Output Operator [RS_29] + |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_27] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: bigint) - | Filter Operator [FIL_61] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Filter Operator [FIL_56] | predicate:_col0 is not null (type: boolean) - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Limit [LIM_24] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Limit [LIM_22] | Number of rows:5 - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_23] - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Reducer 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: bigint) - | Select Operator [SEL_21] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_20] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_21] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_20] + | key expressions:_col3 (type: double), _col2 (type: bigint) + | sort order:-+ + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: int) + | Select Operator [SEL_19] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_18] | | aggregations:["sum(VALUE._col0)"] | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) | | outputColumnNames:["_col0","_col1","_col2","_col3"] | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 9 [SIMPLE_EDGE] - | Reduce Output Operator [RS_19] + | |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) | sort order:+++ | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col3 (type: bigint) - | Group By Operator [GBY_18] + | Group By Operator [GBY_16] | aggregations:["sum(_col1)"] | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_16] + | Select Operator [SEL_14] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_62] + | Filter Operator [FIL_57] | predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) | Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_14] - | alias:cbo_t1 + | TableScan [TS_12] + | alias:cbo_t2 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_27] + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: int) - Filter Operator [FIL_59] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Filter Operator [FIL_54] predicate:_col0 is not null (type: boolean) - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Limit [LIM_12] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_10] Number of rows:5 - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_11] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_10] - key expressions:_col3 (type: double), _col2 (type: bigint) - sort order:-+ - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: string), _col1 (type: int) - Select Operator [SEL_9] - outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_8] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_6] | aggregations:["sum(VALUE._col0)"] | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_7] + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) sort order:+++ Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col3 (type: bigint) - Group By Operator [GBY_6] + Group By Operator [GBY_4] aggregations:["sum(_col1)"] keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3"] Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_4] + Select Operator [SEL_2] outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_60] + Filter Operator [FIL_55] predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_2] - alias:cbo_t2 + TableScan [TS_0] + alias:cbo_t1 Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) diff --git a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out index 2a41aae..01ccae7 100644 --- a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out @@ -887,9 +887,9 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -914,7 +914,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -945,32 +945,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Merge Join Operator condition map: @@ -978,23 +964,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Limit + Number of rows: 4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1003,16 +985,30 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out index d42f9b0..efbd02d 100644 --- a/ql/src/test/results/clientpositive/tez/mrr.q.out +++ b/ql/src/test/results/clientpositive/tez/mrr.q.out @@ -1679,7 +1679,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) + Map 3 <- Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -1696,26 +1696,18 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col3 (type: string) + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -1728,33 +1720,24 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -1767,6 +1750,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/skewjoin.q.out b/ql/src/test/results/clientpositive/tez/skewjoin.q.out index ec368f9..7d0a8e5 100644 --- a/ql/src/test/results/clientpositive/tez/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/skewjoin.q.out @@ -858,15 +858,14 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan @@ -876,14 +875,15 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -929,18 +929,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_union.q.out b/ql/src/test/results/clientpositive/tez/tez_union.q.out index 4012b90..5a7d0d6 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union.q.out @@ -330,8 +330,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 3 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 2 <- Map 1 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 4 <- Map 1 (BROADCAST_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -346,29 +346,17 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 Map Operator Tree: TableScan alias: s1 @@ -388,20 +376,16 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 4 Map Operator Tree: TableScan @@ -414,18 +398,26 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Union 2 - Vertex: Union 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator