commit 166f2f051f6f276654e13f769012121cc0dd91c8 Author: Ashutosh Chauhan Date: Wed Jul 1 16:45:01 2015 -0700 HIVE-11110 : Enable HiveJoinAddNotNullRule in CBO diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index a4484ec..37ff8cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -30,12 +30,11 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -76,8 +75,13 @@ public void onMatch(RelOptRuleCall call) { final Join join = call.rel(0); RelNode leftInput = call.rel(1); RelNode rightInput = call.rel(2); + boolean genLeftNotNullPred = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT); + boolean genRightNotNullPred = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT); - if (join.getJoinType() != JoinRelType.INNER) { + // TODO: 1. NULL Safe joins shouldn't push "is not null" predicate + // Currently CBO doesn't support NULL-SAFE predicates/join + // 2. Enable it for outer join (remove != INNER check below) + if (join.getJoinType() != JoinRelType.INNER /*!genLeftNotNullPred && !genRightNotNullPred && */) { return; } @@ -101,10 +105,10 @@ public void onMatch(RelOptRuleCall call) { final RelOptCluster cluster = join.getCluster(); final RexBuilder rexBuilder = join.getCluster().getRexBuilder(); - final Map newLeftConditions = getNotNullConditions(cluster, - rexBuilder, leftInput, joinLeftKeyPositions); - final Map newRightConditions = getNotNullConditions(cluster, - rexBuilder, rightInput, joinRightKeyPositions); + final Map newLeftConditions = genLeftNotNullPred ? getNotNullConditions(cluster, + rexBuilder, leftInput, joinLeftKeyPositions) : null; + final Map newRightConditions = genRightNotNullPred ? getNotNullConditions(cluster, + rexBuilder, rightInput, joinRightKeyPositions) : null; // Nothing will be added to the expression if (newLeftConditions == null && newRightConditions == null) { @@ -112,16 +116,10 @@ public void onMatch(RelOptRuleCall call) { } if (newLeftConditions != null) { - if (leftInput instanceof HiveFilter) { - leftInput = leftInput.getInput(0); - } leftInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, leftInput, newLeftConditions.values()); } if (newRightConditions != null) { - if (rightInput instanceof HiveFilter) { - rightInput = rightInput.getInput(0); - } rightInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, rightInput, newRightConditions.values()); } @@ -142,12 +140,9 @@ public void onMatch(RelOptRuleCall call) { final RelDataType returnType = cluster.getTypeFactory(). createSqlType(SqlTypeName.BOOLEAN); - final Map newConditions; - if (input instanceof HiveFilter) { - newConditions = splitCondition(((HiveFilter) input).getCondition()); - } - else { - newConditions = new HashMap(); + final Map newConditions = new HashMap(); + for (RexNode node : RelMetadataQuery.getPulledUpPredicates(input).pulledUpPredicates) { + newConditions.put(node.toString(), node); } for (int pos : inputKeyPositions) { try { @@ -175,20 +170,7 @@ public void onMatch(RelOptRuleCall call) { } return newConditions; } - - private static Map splitCondition(RexNode condition) { - Map newConditions = new HashMap(); - if (condition.getKind() == SqlKind.AND) { - for (RexNode node : ((RexCall) condition).getOperands()) { - newConditions.put(node.toString(), node); - } - } - else { - newConditions.put(condition.toString(), condition); - } - return newConditions; - } - + private static RelNode createHiveFilterConjunctiveCondition(FilterFactory filterFactory, RexBuilder rexBuilder, RelNode input, Collection conditions) { final RexNode newCondition = RexUtil.composeConjunction(rexBuilder, conditions, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index dccb598..ae5e56e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -508,6 +508,9 @@ public static boolean prunePartitionNames(List partColumnNames, boolean isUnknown = (isNeeded == null); if (!isUnknown && !isNeeded) { partIter.remove(); + if (LOG.isDebugEnabled()) { + LOG.debug("skipping partition: " + partName); + } continue; } if (isUnknown && values.contains(defaultPartitionName)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7fd8c85..0d170ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -64,6 +64,7 @@ import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; @@ -946,10 +947,9 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. Add not null filters - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); - } + // 2. Add "IS Not NULL" predicate below Join + // TODO: We should add it to #3 + basePlan = hepPlan(basePlan, false, mdProvider, HiveJoinAddNotNullRule.INSTANCE); // 3. Constant propagation, common filter extraction, and PPD basePlan = hepPlan(basePlan, true, mdProvider, @@ -965,19 +965,24 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 4. Transitive inference & Partition Pruning - basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + // 4. Filter Merge + basePlan = hepPlan(basePlan, false, mdProvider, new FilterMergeRule( + HiveFilter.DEFAULT_FILTER_FACTORY)); + + // 5. Transitive inference & Partition Pruning + basePlan = hepPlan(basePlan, false, mdProvider, + new JoinPushTransitivePredicatesRule(Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 5. Projection Pruning + // 6. Projection Pruning + //TODO: Move this as the first rule HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 6. Rerun PPD through Project as column pruning would have introduced DT + // 7. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 79b4650..40a7cb5 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -610,7 +610,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) @@ -929,33 +929,39 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -963,7 +969,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1240,7 +1246,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out index 1894110..b9426b8 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out @@ -11,39 +11,65 @@ explain select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3) cbo_t3 on cbo_t1.key=a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: cbo_t1 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: cbo_t2 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 81 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: cbo_t3 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE @@ -56,24 +82,33 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 81 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 400 Data size: 71200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 400 Data size: 71200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 324 Data size: 57672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 324 Data size: 57672 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 324 Data size: 57672 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -650,39 +685,66 @@ explain select key, c_int, cbo_t2.p, cbo_t2.q, cbo_t3.x, cbo_t4.b from cbo_t1 jo (select key as a, c_int as b, c_float as c from cbo_t1) cbo_t4 on cbo_t1.key=a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: cbo_t1 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: cbo_t2 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 81 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: cbo_t3 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE @@ -696,6 +758,43 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 81 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 324 Data size: 85212 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 324 Data size: 85212 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 324 Data size: 85212 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string) + TableScan alias: cbo_t1 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -711,23 +810,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - Left Outer Join0 to 3 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6 - Statistics: Num rows: 2000 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1620 Data size: 432540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2000 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1620 Data size: 432540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2000 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1620 Data size: 432540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat