diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java new file mode 100644 index 0000000..11d5b4e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + + +public class HivePreFilteringRule extends RelOptRule { + + protected static final Log LOG = LogFactory + .getLog(HivePreFilteringRule.class.getName()); + + + public static final HivePreFilteringRule INSTANCE = + new HivePreFilteringRule(); + + private final FilterFactory filterFactory; + + + private static final Set COMPARISON_UDFS = Sets.newHashSet( + GenericUDFOPEqual.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(), + GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(), + GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(), + GenericUDFOPLessThan.class.getAnnotation(Description.class).name(), + GenericUDFOPNotEqual.class.getAnnotation(Description.class).name()); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String BETWEEN_UDF = + GenericUDFBetween.class.getAnnotation(Description.class).name(); + + + private HivePreFilteringRule() { + super(operand(Filter.class, + operand(RelNode.class, any()))); + this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY; + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final RelNode filterChild = call.rel(1); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We extract possible candidates to be pushed down + List commonOperands = new ArrayList<>(); + switch (condition.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); + for (RexNode operand: operands) { + if (operand.getKind() == SqlKind.OR) { + commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + } + } + break; + case OR: + commonOperands = extractCommonOperands(rexBuilder,condition); + break; + default: + return; + } + + // 2. If we did not generate anything for the new predicate, we bail out + if (commonOperands.isEmpty()) { + return; + } + + // 3. If the new conjuncts are already present in the plan, we bail out + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter); + final List newConjuncts = new ArrayList<>(); + for (RexNode commonOperand : commonOperands) { + boolean found = false; + for (RexNode conjunct : predicates.pulledUpPredicates) { + if (commonOperand.toString().equals(conjunct.toString())) { + found = true; + break; + } + } + if (!found) { + newConjuncts.add(commonOperand); + } + } + if (newConjuncts.isEmpty()) { + return; + } + + // 4. Otherwise, we create a new condition + final RexNode newCondition = RexUtil.pullFactors(rexBuilder, + RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); + + // 5. We create the new filter that might be pushed down + RelNode newFilter = filterFactory.createFilter(filterChild, newCondition); + RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + + call.transformTo(newTopFilter); + + } + + private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition) { + assert condition.getKind() == SqlKind.OR; + Multimap reductionCondition = LinkedHashMultimap.create(); + + // 1. We extract the information necessary to create the predicate for the new + // filter; currently we support comparison functions, in and between + ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + for (RexNode operand : operands) { + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List conjunctions = RelOptUtil.conjunctions(operandCNF); + for (RexNode conjunction: conjunctions) { + if (!(conjunction instanceof RexCall)) { + continue; + } + RexCall conjCall = (RexCall) conjunction; + if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + reductionCondition.put(conjCall.operands.get(0).toString(), + conjCall); + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + reductionCondition.put(conjCall.operands.get(1).toString(), + conjCall); + } + } else if(conjCall.getOperator().getName().equals(IN_UDF)) { + reductionCondition.put(conjCall.operands.get(0).toString(), + conjCall); + } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) { + reductionCondition.put(conjCall.operands.get(1).toString(), + conjCall); + } + } + } + + // 2. We gather the common factors and return them + List commonOperands = new ArrayList<>(); + for (Entry> pair : reductionCondition.asMap().entrySet()) { + if (pair.getValue().size() == operands.size()) { + commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false)); + } + } + return commonOperands; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 4760a22..8c762e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -145,6 +145,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -932,17 +933,18 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); } - // 3. PPD + // 3. Constant propagation, common filter extraction, and PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, ReduceExpressionsRule.JOIN_INSTANCE, - new HiveFilterProjectTransposeRule( - Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), - new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, + HivePreFilteringRule.INSTANCE, + new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), + new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY), + HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, + new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); // 4. Transitive inference & Partition Pruning diff --git ql/src/test/queries/clientpositive/filter_cond_pushdown.q ql/src/test/queries/clientpositive/filter_cond_pushdown.q new file mode 100644 index 0000000..5e23b71 --- /dev/null +++ ql/src/test/queries/clientpositive/filter_cond_pushdown.q @@ -0,0 +1,19 @@ +EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09'); + +EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09'); + +EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key; diff --git ql/src/test/results/clientpositive/auto_join16.q.out ql/src/test/results/clientpositive/auto_join16.q.out index b07eaf6..1bad0f9 100644 --- ql/src/test/results/clientpositive/auto_join16.q.out +++ ql/src/test/results/clientpositive/auto_join16.q.out @@ -23,11 +23,11 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:a + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:a + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/auto_join8.q.out ql/src/test/results/clientpositive/auto_join8.q.out index 485622a..5b02597 100644 --- ql/src/test/results/clientpositive/auto_join8.q.out +++ ql/src/test/results/clientpositive/auto_join8.q.out @@ -73,12 +73,12 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -86,17 +86,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 is null (type: boolean) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out new file mode 100644 index 0000000..e09057a --- /dev/null +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -0,0 +1,382 @@ +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1 = '2008-04-08') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 205 Data size: 2177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 1.0) (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_int (type: int), c_float (type: float) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: float) + TableScan + alias: t3 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_int = 1) and (UDFToDouble(key) = 1.0)) (type: boolean) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_float (type: float) + outputColumnNames: _col2 + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: float) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col2 + _col5) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 1.0) (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: '1.0' (type: string) + sort order: + + Map-reduce partition columns: '1.0' (type: string) + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '1.0' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 6d4e165..3c0ed08 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -473,7 +473,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) + predicate: ((((key = '484') or (key = '238')) and ((value = 'val_484') or (value = 'val_238'))) and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index d7c40a3..791575f 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -2475,7 +2475,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2)))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) diff --git ql/src/test/results/clientpositive/ppd_gby_join.q.out ql/src/test/results/clientpositive/ppd_gby_join.q.out index 60ae8f9..579c827 100644 --- ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -42,16 +42,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -59,20 +62,27 @@ STAGE PLANS: predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) + predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/ppd_join.q.out ql/src/test/results/clientpositive/ppd_join.q.out index 8fde2d7..ae5fb27 100644 --- ql/src/test/results/clientpositive/ppd_join.q.out +++ ql/src/test/results/clientpositive/ppd_join.q.out @@ -39,17 +39,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -57,20 +60,27 @@ STAGE PLANS: predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) + predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out index b821d91..88624ea 100644 --- ql/src/test/results/clientpositive/ppd_join2.q.out +++ ql/src/test/results/clientpositive/ppd_join2.q.out @@ -46,17 +46,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '14') (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -68,17 +71,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -87,21 +93,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -119,23 +125,30 @@ STAGE PLANS: predicate: (((key <> '306') and (sqrt(key) <> 13.0)) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (sqrt(_col0) <> 13.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/ppd_join3.q.out ql/src/test/results/clientpositive/ppd_join3.q.out index 49da9e0..6c5c0da 100644 --- ql/src/test/results/clientpositive/ppd_join3.q.out +++ ql/src/test/results/clientpositive/ppd_join3.q.out @@ -46,17 +46,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -64,20 +67,27 @@ STAGE PLANS: predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '12') and (_col0 <> '4')) (type: boolean) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 19 Data size: 201 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 19 Data size: 201 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 > '0') and (((_col1 <> 'val_500') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '12') and (_col0 <> '4')) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -86,24 +96,24 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -125,22 +135,25 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and ((_col0 < '400') and ((_col0 <> '12') and (_col0 <> '4'))))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '1') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '11') and ((_col0 > '0') and ((_col0 < '400') and ((_col0 <> '12') and (_col0 <> '4'))))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator @@ -150,14 +163,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col3 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/ppd_join2.q.out ql/src/test/results/clientpositive/spark/ppd_join2.q.out index dd4d129..cf81423 100644 --- ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -47,17 +47,24 @@ STAGE PLANS: predicate: (((key <> '306') and (sqrt(key) <> 13.0)) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (sqrt(_col0) <> 13.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan @@ -71,17 +78,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '14') (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '302') and ((_col0 <> '311') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -95,17 +105,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -136,26 +149,26 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col2 <> '10') or (_col0 <> '10')) (type: boolean) - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col3 (type: string) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/ppd_join3.q.out ql/src/test/results/clientpositive/spark/ppd_join3.q.out index c93994c..d2343c4 100644 --- ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -51,16 +51,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and ((_col0 < '400') and ((_col0 <> '12') and (_col0 <> '4'))))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '1') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '11') and ((_col0 > '0') and ((_col0 < '400') and ((_col0 <> '12') and (_col0 <> '4'))))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan @@ -74,17 +77,20 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + predicate: ((_col0 <> '11') and ((_col0 > '0') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -94,20 +100,27 @@ STAGE PLANS: predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean) Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 <> '12') and (_col0 <> '4')) (type: boolean) - Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 19 Data size: 201 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 19 Data size: 201 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 > '0') and (((_col1 <> 'val_500') or (_col0 > '1')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '12') and (_col0 <> '4')) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -117,14 +130,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col3 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -138,29 +151,29 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean) - Statistics: Num rows: 20 Data size: 221 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out index 0b6f33f..fd6d53b 100644 --- ql/src/test/results/clientpositive/subquery_notin.q.out +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -151,7 +151,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -285,7 +285,7 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -528,7 +528,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -1243,7 +1243,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond azure blanched chiffon midnight 23 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr, Group By in Parent Query select li.l_partkey, count(*) from lineitem li @@ -1278,7 +1278,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1442,7 +1442,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1587,7 +1587,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/subquery_views.q.out ql/src/test/results/clientpositive/subquery_views.q.out index bdab9ea..41834a3 100644 --- ql/src/test/results/clientpositive/subquery_views.q.out +++ ql/src/test/results/clientpositive/subquery_views.q.out @@ -69,8 +69,8 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@cv2 -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -378,8 +378,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/vectorization_7.q.out ql/src/test/results/clientpositive/tez/vectorization_7.q.out index 34f590d..dd82bc7 100644 --- ql/src/test/results/clientpositive/tez/vectorization_7.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_7.q.out @@ -65,12 +65,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -237,12 +237,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index c7c096e..4d27a87 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -62,12 +62,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -231,12 +231,12 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + predicate: (((((988888.0 < cdouble) or (3569.0 >= cdouble)) and (ctinyint <> 0)) and ((UDFToDouble(ctimestamp1) <= 0.0) or ((UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')))) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4853 Data size: 1043412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE