diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java new file mode 100644 index 0000000..11d5b4e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + + +public class HivePreFilteringRule extends RelOptRule { + + protected static final Log LOG = LogFactory + .getLog(HivePreFilteringRule.class.getName()); + + + public static final HivePreFilteringRule INSTANCE = + new HivePreFilteringRule(); + + private final FilterFactory filterFactory; + + + private static final Set COMPARISON_UDFS = Sets.newHashSet( + GenericUDFOPEqual.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(), + GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(), + GenericUDFOPLessThan.class.getAnnotation(Description.class).name(), + GenericUDFOPNotEqual.class.getAnnotation(Description.class).name()); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String BETWEEN_UDF = + GenericUDFBetween.class.getAnnotation(Description.class).name(); + + + private HivePreFilteringRule() { + super(operand(Filter.class, + operand(RelNode.class, any()))); + this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY; + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final RelNode filterChild = call.rel(1); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We extract possible candidates to be pushed down + List commonOperands = new ArrayList<>(); + switch (condition.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); + for (RexNode operand: operands) { + if (operand.getKind() == SqlKind.OR) { + commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + } + } + break; + case OR: + commonOperands = extractCommonOperands(rexBuilder,condition); + break; + default: + return; + } + + // 2. If we did not generate anything for the new predicate, we bail out + if (commonOperands.isEmpty()) { + return; + } + + // 3. If the new conjuncts are already present in the plan, we bail out + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter); + final List newConjuncts = new ArrayList<>(); + for (RexNode commonOperand : commonOperands) { + boolean found = false; + for (RexNode conjunct : predicates.pulledUpPredicates) { + if (commonOperand.toString().equals(conjunct.toString())) { + found = true; + break; + } + } + if (!found) { + newConjuncts.add(commonOperand); + } + } + if (newConjuncts.isEmpty()) { + return; + } + + // 4. Otherwise, we create a new condition + final RexNode newCondition = RexUtil.pullFactors(rexBuilder, + RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); + + // 5. We create the new filter that might be pushed down + RelNode newFilter = filterFactory.createFilter(filterChild, newCondition); + RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + + call.transformTo(newTopFilter); + + } + + private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition) { + assert condition.getKind() == SqlKind.OR; + Multimap reductionCondition = LinkedHashMultimap.create(); + + // 1. We extract the information necessary to create the predicate for the new + // filter; currently we support comparison functions, in and between + ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + for (RexNode operand : operands) { + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List conjunctions = RelOptUtil.conjunctions(operandCNF); + for (RexNode conjunction: conjunctions) { + if (!(conjunction instanceof RexCall)) { + continue; + } + RexCall conjCall = (RexCall) conjunction; + if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + reductionCondition.put(conjCall.operands.get(0).toString(), + conjCall); + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + reductionCondition.put(conjCall.operands.get(1).toString(), + conjCall); + } + } else if(conjCall.getOperator().getName().equals(IN_UDF)) { + reductionCondition.put(conjCall.operands.get(0).toString(), + conjCall); + } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) { + reductionCondition.put(conjCall.operands.get(1).toString(), + conjCall); + } + } + } + + // 2. We gather the common factors and return them + List commonOperands = new ArrayList<>(); + for (Entry> pair : reductionCondition.asMap().entrySet()) { + if (pair.getValue().size() == operands.size()) { + commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false)); + } + } + return commonOperands; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 4760a22..c0d5d67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -145,6 +145,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -932,32 +933,32 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); } - // 3. PPD + // 3. Constant propagation, common filter extraction, PPD, transitive inference, + // and Partition Pruning basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, ReduceExpressionsRule.JOIN_INSTANCE, - new HiveFilterProjectTransposeRule( - Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), - new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - - // 4. Transitive inference & Partition Pruning - basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + HivePreFilteringRule.INSTANCE, + new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), + new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY), + HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, + new FilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), + new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY), + new JoinPushTransitivePredicatesRule(Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 5. Projection Pruning + // 4. Projection Pruning RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 6. Rerun PPD through Project as column pruning would have introduced DT + // 5. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, diff --git ql/src/test/queries/clientpositive/filter_cond_pushdown.q ql/src/test/queries/clientpositive/filter_cond_pushdown.q new file mode 100644 index 0000000..5e23b71 --- /dev/null +++ ql/src/test/queries/clientpositive/filter_cond_pushdown.q @@ -0,0 +1,19 @@ +EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09'); + +EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09'); + +EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key; diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out new file mode 100644 index 0000000..e09057a --- /dev/null +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -0,0 +1,382 @@ +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1 = '2008-04-08') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 1.0) (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_int (type: int), c_float (type: float) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: float) + TableScan + alias: t3 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_int = 1) and (UDFToDouble(key) = 1.0)) (type: boolean) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_float (type: float) + outputColumnNames: _col2 + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: float) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col2 + _col5) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 1.0) (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '1.0' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 6d4e165..3c0ed08 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -473,7 +473,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) + predicate: ((((key = '484') or (key = '238')) and ((value = 'val_484') or (value = 'val_238'))) and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index d7c40a3..791575f 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -2475,7 +2475,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2)))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) diff --git ql/src/test/results/clientpositive/tez/vectorization_7.q.out ql/src/test/results/clientpositive/tez/vectorization_7.q.out index 34f590d..dd82bc7 100644 --- ql/src/test/results/clientpositive/tez/vectorization_7.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_7.q.out @@ -65,12 +65,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -237,12 +237,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index c7c096e..4d27a87 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -62,12 +62,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -231,12 +231,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE