diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java index b1eca7d..1b327fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -30,6 +31,7 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexShuttle; @@ -44,8 +46,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Multimap; public class HiveRexUtil { @@ -462,15 +466,33 @@ public static RexNode simplifyAnd2ForUnknownAsFalse(RexBuilder rexBuilder, return simplify(rexBuilder, terms.get(0), true); } // Try to simplify the expression + final Multimap> equalityTerms = ArrayListMultimap.create(); + final Map equalityConstantTerms = new HashMap<>(); final Set negatedTerms = new HashSet<>(); final Set nullOperands = new HashSet<>(); final Set notNullOperands = new LinkedHashSet<>(); final Set comparedOperands = new HashSet<>(); for (int i = 0; i < terms.size(); i++) { - final RexNode term = terms.get(i); + RexNode term = terms.get(i); if (!HiveCalciteUtil.isDeterministic(term)) { continue; } + // Simplify BOOLEAN expressions if possible + while (term.getKind() == SqlKind.EQUALS) { + RexCall call = (RexCall) term; + if (call.getOperands().get(0).isAlwaysTrue()) { + term = call.getOperands().get(1); + terms.remove(i); + terms.add(i, term); + continue; + } else if (call.getOperands().get(1).isAlwaysTrue()) { + term = call.getOperands().get(0); + terms.remove(i); + terms.add(i, term); + continue; + } + break; + } switch (term.getKind()) { case EQUALS: case NOT_EQUALS: @@ -481,18 +503,44 @@ public static RexNode simplifyAnd2ForUnknownAsFalse(RexBuilder rexBuilder, RexCall call = (RexCall) term; RexNode left = call.getOperands().get(0); comparedOperands.add(left.toString()); + RexCall leftCast = null; // if it is a cast, we include the inner reference if (left.getKind() == SqlKind.CAST) { - RexCall leftCast = (RexCall) left; + leftCast = (RexCall) left; comparedOperands.add(leftCast.getOperands().get(0).toString()); } RexNode right = call.getOperands().get(1); comparedOperands.add(right.toString()); + RexCall rightCast = null; // if it is a cast, we include the inner reference if (right.getKind() == SqlKind.CAST) { - RexCall rightCast = (RexCall) right; + rightCast = (RexCall) right; comparedOperands.add(rightCast.getOperands().get(0).toString()); } + // Check for equality on different constants. If the same ref or CAST(ref) + // is equal to different constants, this condition cannot be satisfied, + // and hence it can be evaluated to FALSE + if (term.getKind() == SqlKind.EQUALS) { + boolean leftRef = left instanceof RexInputRef || + (leftCast != null && leftCast.getOperands().get(0) instanceof RexInputRef); + boolean rightRef = right instanceof RexInputRef || + (rightCast != null && rightCast.getOperands().get(0) instanceof RexInputRef); + if (right instanceof RexLiteral && leftRef) { + final String literal = right.toString(); + final String prevLiteral = equalityConstantTerms.put(left.toString(), literal); + if (prevLiteral != null && !literal.equals(prevLiteral)) { + return rexBuilder.makeLiteral(false); + } + } else if (left instanceof RexLiteral && rightRef) { + final String literal = left.toString(); + final String prevLiteral = equalityConstantTerms.put(right.toString(), literal); + if (prevLiteral != null && !literal.equals(prevLiteral)) { + return rexBuilder.makeLiteral(false); + } + } else if (leftRef && rightRef) { + equalityTerms.put(left.toString(), Pair.of(right.toString(), term)); + } + } // Assume the expression a > 5 is part of a Filter condition. // Then we can derive the negated term: a <= 5. // But as the comparison is string based and thus operands order dependent, @@ -528,6 +576,30 @@ public static RexNode simplifyAnd2ForUnknownAsFalse(RexBuilder rexBuilder, if (!Collections.disjoint(nullOperands, comparedOperands)) { return rexBuilder.makeLiteral(false); } + // Check for equality of two refs wrt equality with constants + // Example #1. x=5 AND y=5 AND x=y : x=5 AND y=5 + // Example #2. x=5 AND y=6 AND x=y - not satisfiable + for (String ref1 : equalityTerms.keySet()) { + final String literal1 = equalityConstantTerms.get(ref1); + if (literal1 == null) { + continue; + } + Collection> references = equalityTerms.get(ref1); + for (Pair ref2 : references) { + final String literal2 = equalityConstantTerms.get(ref2.left); + if (literal2 == null) { + continue; + } + if (!literal1.equals(literal2)) { + // If an expression is equal to two different constants, + // it is not satisfiable + return rexBuilder.makeLiteral(false); + } + // Otherwise we can remove the term, as we already know that + // the expression is equal to two constants + terms.remove(ref2.right); + } + } // Remove not necessary IS NOT NULL expressions. // // Example. IS NOT NULL(x) AND x < 5 : x < 5 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 514ae62..2fc68ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -31,7 +31,6 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.ValuesReduceRule; @@ -219,13 +218,7 @@ public JoinReduceExpressionsRule(Class joinClass, mq.getPulledUpPredicates(join.getRight()); final RelOptPredicateList predicates = leftPredicates.union(rightPredicates.shift(fieldCount)); - if (!reduceExpressions(join, expList, predicates)) { - return; - } - final JoinInfo joinInfo = JoinInfo.of(join.getLeft(), join.getRight(), expList.get(0)); - if (!joinInfo.isEqui()) { - // This kind of join must be an equi-join, and the condition is - // no longer an equi-join. SemiJoin is an example of this. + if (!reduceExpressions(join, expList, predicates, true)) { return; } call.transformTo( diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 99183fc..a606e30 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -756,10 +756,10 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((year = 2001) and (state = 'OH') and (state = 'FL')) (type: boolean) + predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'FL' (type: string), locid (type: int), zip (type: bigint), 2001 (type: int) + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/cbo_rp_join1.q.out ql/src/test/results/clientpositive/cbo_rp_join1.q.out index 4d785bf..9cb9594 100644 --- ql/src/test/results/clientpositive/cbo_rp_join1.q.out +++ ql/src/test/results/clientpositive/cbo_rp_join1.q.out @@ -119,7 +119,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### NULL -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 @@ -141,7 +141,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = _col1) and (_col1 = 40) and (_col0 = 40)) (type: boolean) + predicate: ((_col1 = 40) and (_col0 = 40)) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: @@ -224,6 +224,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### NULL +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key = 40 AND a.key = b.key AND b.key = 40 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key = 40 AND a.key = b.key AND b.key = 40 @@ -248,11 +249,9 @@ STAGE PLANS: predicate: (_col0 = 40) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) TableScan alias: a Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE @@ -264,24 +263,22 @@ STAGE PLANS: predicate: (_col0 = 40) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Outer Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 23 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -322,6 +319,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key = 40 AND a.key = b.key AND b.key = 40 PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/constprog_semijoin.q.out ql/src/test/results/clientpositive/constprog_semijoin.q.out index 1940987..040cfb4 100644 --- ql/src/test/results/clientpositive/constprog_semijoin.q.out +++ ql/src/test/results/clientpositive/constprog_semijoin.q.out @@ -421,26 +421,26 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((dimid = 100) = true) and (dimid <> 100)) (type: boolean) - Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE + expressions: id (type: int), val (type: string), val1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), true (type: boolean) + key expressions: 100 (type: int), true (type: boolean) sort order: ++ - Map-reduce partition columns: _col3 (type: int), true (type: boolean) - Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: 100 (type: int), true (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((id = 100) = true) and (id <> 100)) (type: boolean) + predicate: ((id = 100) and (id = 100) is not null) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), true (type: boolean) + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -458,13 +458,13 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col3 (type: int), true (type: boolean) + 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -502,50 +502,50 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((dimid) IN (100, 200) and ((dimid = 100) = true)) (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + expressions: id (type: int), val (type: string), val1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), true (type: boolean) + key expressions: 100 (type: int), true (type: boolean) sort order: ++ - Map-reduce partition columns: _col3 (type: int), true (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: 100 (type: int), true (type: boolean) + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id) IN (100, 200) and ((id = 100) = true)) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), true (type: boolean) + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: boolean) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 _col3 (type: int), true (type: boolean) + 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -585,50 +585,50 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((dimid = 100) = true) and (dimid = 200)) (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 200 (type: int), true (type: boolean) + key expressions: 100 (type: int), true (type: boolean) sort order: ++ - Map-reduce partition columns: 200 (type: int), true (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: 100 (type: int), true (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((id = 100) = true) and (id = 200)) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 200 (type: int), true (type: boolean) + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: boolean) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 200 (type: int), true (type: boolean) + 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -666,38 +666,38 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((dimid = 100) = true) and (dimid = 100)) (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 100 (type: int), true (type: boolean) sort order: ++ Map-reduce partition columns: 100 (type: int), true (type: boolean) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((id = 100) = true) and (id = 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ((id = 100) and (id = 100) is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: boolean) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -706,10 +706,10 @@ STAGE PLANS: 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -749,26 +749,26 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((dimid = 100) = true) and dimid is not null) (type: boolean) + predicate: ((dimid = 100) and (dimid = 100) is not null) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: id (type: int), val (type: string), val1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), true (type: boolean) + key expressions: 100 (type: int), true (type: boolean) sort order: ++ - Map-reduce partition columns: _col3 (type: int), true (type: boolean) + Map-reduce partition columns: 100 (type: int), true (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((id = 100) = true) and id is not null) (type: boolean) + predicate: ((id = 100) and (id = 100) is not null) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int), true (type: boolean) + expressions: 100 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -786,7 +786,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col3 (type: int), true (type: boolean) + 0 100 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE