diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 4825a61..7377efc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -66,8 +66,6 @@ import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; @@ -83,8 +81,6 @@ public class HiveCalciteUtil { - private static final Logger LOG = LoggerFactory.getLogger(HiveCalciteUtil.class); - /** * Get list of virtual columns from the given list of projections. @@ -630,8 +626,13 @@ public String apply(RexNode r) { public static ImmutableList getPredsNotPushedAlready(RelNode inp, List predsToPushDown) { final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(inp); + List preds = Lists.newArrayList(); + for (RexNode pred : predicates.pulledUpPredicates) { + preds.add(pred); + preds.addAll(RelOptUtil.conjunctions(pred)); + } final ImmutableSet alreadyPushedPreds = ImmutableSet.copyOf(Lists.transform( - predicates.pulledUpPredicates, REX_STR_FN)); + preds, REX_STR_FN)); final ImmutableList.Builder newConjuncts = ImmutableList.builder(); for (RexNode r : predsToPushDown) { if (!alreadyPushedPreds.contains(r.toString())) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index c0609d7..d051b10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDeterministicSelectivityCost; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; @@ -80,4 +81,12 @@ public RelMetadataProvider getMetadataProvider() { new DefaultRelMetadataProvider())); } + public RelMetadataProvider getDummyMetadataProvider() { + return ChainedRelMetadataProvider.of(ImmutableList + .of( + HiveRelMdDeterministicSelectivityCost.SOURCE, + HiveRelMdPredicates.SOURCE, + new DefaultRelMetadataProvider())); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java index 3c5cac2..4baaa9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java @@ -64,6 +64,13 @@ public String toString() { } }; + public static final HiveCost LARGE = new HiveCost(1000.0, 1000.0, 0.0) { + @Override + public String toString() { + return "{large}"; + } + }; + public static final RelOptCostFactory FACTORY = new Factory(); // ~ Instance fields -------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java index d15d885..4af1f8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java @@ -56,8 +56,8 @@ public RelOptCost getJoinCost(HiveJoin join) { JoinAlgorithm joinAlgorithm = null; RelOptCost minJoinCost = null; - if (LOG.isDebugEnabled()) { - LOG.debug("Join algorithm selection for:\n" + RelOptUtil.toString(join)); + if (LOG.isTraceEnabled()) { + LOG.trace("Join algorithm selection for:\n" + RelOptUtil.toString(join)); } for (JoinAlgorithm possibleAlgorithm : this.joinAlgorithms) { @@ -65,8 +65,8 @@ public RelOptCost getJoinCost(HiveJoin join) { continue; } RelOptCost joinCost = possibleAlgorithm.getCost(join); - if (LOG.isDebugEnabled()) { - LOG.debug(possibleAlgorithm + " cost: " + joinCost); + if (LOG.isTraceEnabled()) { + LOG.trace(possibleAlgorithm + " cost: " + joinCost); } if (minJoinCost == null || joinCost.isLt(minJoinCost) ) { joinAlgorithm = possibleAlgorithm; @@ -74,8 +74,8 @@ public RelOptCost getJoinCost(HiveJoin join) { } } - if (LOG.isDebugEnabled()) { - LOG.debug(joinAlgorithm + " selected"); + if (LOG.isTraceEnabled()) { + LOG.trace(joinAlgorithm + " selected"); } join.setJoinAlgorithm(joinAlgorithm); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java index 8b57b35..3435e63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java @@ -26,7 +26,7 @@ import org.apache.calcite.rel.core.Union; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; -public class HiveUnion extends Union { +public class HiveUnion extends Union implements HiveRelNode { public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List inputs) { super(cluster, traits, inputs, true); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterMergeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterMergeRule.java new file mode 100644 index 0000000..ab71f4c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterMergeRule.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.rules.FilterMergeRule; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; + +public class HiveFilterMergeRule extends FilterMergeRule { + + public static final HiveFilterMergeRule INSTANCE = + new HiveFilterMergeRule(HiveRelFactories.HIVE_BUILDER); + + /** + * Creates a HiveFilterSetOpTransposeRule. + */ + public HiveFilterMergeRule(RelBuilderFactory relBuilderFactory) { + super(relBuilderFactory); + } + + public void onMatch(RelOptRuleCall call) { + final HiveFilter filter = call.rel(0); + super.onMatch(call); + // New plan is absolutely better than old plan + call.getPlanner().setImportance(filter, 0.0); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java index 5c16d8f..4bbe339 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java @@ -19,15 +19,22 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilderFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { - public HiveFilterSetOpTransposeRule(FilterFactory filterFactory) { - super(filterFactory); + public static final HiveFilterSetOpTransposeRule INSTANCE = + new HiveFilterSetOpTransposeRule(HiveRelFactories.HIVE_BUILDER); + + /** + * Creates a HiveFilterSetOpTransposeRule. + */ + public HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) { + super(relBuilderFactory); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index de880ce..a6a4663 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -44,9 +44,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -public final class HiveJoinAddNotNullRule extends RelOptRule { - private static final String NOT_NULL_FUNC_NAME = "isnotnull"; +public final class HiveJoinAddNotNullRule extends RelOptRule { /** The singleton. */ public static final HiveJoinAddNotNullRule INSTANCE = diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 703c8c6..5ec2fa1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -23,6 +23,7 @@ import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories; @@ -35,11 +36,14 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hive.common.util.AnnotationUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; @@ -55,6 +59,9 @@ * and applies them appropriately. */ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { + + private static final Logger LOG = LoggerFactory.getLogger(HiveJoinPushTransitivePredicatesRule.class); + private final RelFactories.FilterFactory filterFactory; /** The singleton. */ @@ -64,42 +71,37 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, RelFactories.FilterFactory filterFactory) { - super(operand(clazz, operand(RelNode.class, any()), - operand(RelNode.class, any()))); + super(operand(clazz, any())); this.filterFactory = filterFactory; } @Override public void onMatch(RelOptRuleCall call) { Join join = call.rel(0); - // Register that we have visited this operator in this rule - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - if (registry != null) { - registry.registerVisited(this, join); - } - RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); RexBuilder rB = join.getCluster().getRexBuilder(); - RelNode lChild = call.rel(1); - RelNode rChild = call.rel(2); + RelNode lChild = join.getLeft(); + RelNode rChild = join.getRight(); List leftPreds = getValidPreds(join.getCluster(), lChild, preds.leftInferredPredicates, lChild.getRowType()); List rightPreds = getValidPreds(join.getCluster(), rChild, preds.rightInferredPredicates, rChild.getRowType()); - if (leftPreds.isEmpty() && rightPreds.isEmpty()) { + RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false); + RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false); + if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) { return; } - if (leftPreds.size() > 0) { + if (!newLeftPredicate.isAlwaysTrue()) { RelNode curr = lChild; - lChild = filterFactory.createFilter(lChild, RexUtil.composeConjunction(rB, leftPreds, false)); + lChild = filterFactory.createFilter(lChild, newLeftPredicate); call.getPlanner().onCopy(curr, lChild); } - if (rightPreds.size() > 0) { + if (!newRightPredicate.isAlwaysTrue()) { RelNode curr = rChild; - rChild = filterFactory.createFilter(rChild, RexUtil.composeConjunction(rB, rightPreds, false)); + rChild = filterFactory.createFilter(rChild, newRightPredicate); call.getPlanner().onCopy(curr, rChild); } @@ -107,11 +109,6 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, lChild, rChild, join.getJoinType(), join.isSemiJoinDone()); call.getPlanner().onCopy(join, newRel); - // We register new Join rel so we do not fire the rule on them again - if (registry != null) { - registry.registerVisited(this, newRel); - } - call.transformTo(newRel); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index d37fc0e..44925a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -169,13 +169,13 @@ public void onMatch(RelOptRuleCall call) { // 3. If the new conjuncts are already present in the plan, we bail out final List newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(), operandsToPushDown); - if (newConjuncts.isEmpty()) { + RexNode newPredicate = RexUtil.composeConjunction(rexBuilder, newConjuncts, false); + if (newPredicate.isAlwaysTrue()) { return; } // 4. Otherwise, we create a new condition - final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, - RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); + final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, newPredicate); // 5. We create the new filter that might be pushed down RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 50e139b..ea1c56e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -16,6 +16,15 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; @@ -25,6 +34,7 @@ import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.ValuesReduceRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; @@ -51,21 +61,12 @@ import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; - /** * Collection of planner rules that apply various simplifying transformations on * RexNode trees. Currently, there are two transformations: @@ -221,26 +222,9 @@ public ProjectReduceExpressionsRule(Class projectClass, super(projectClass, relBuilderFactory, "HiveReduceExpressionsRule(Project)"); } - public boolean matches(RelOptRuleCall call) { - Project project = call.rel(0); - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - - // If this operator has been visited already by the rule, - // we do not need to apply the optimization - if (registry != null && registry.getVisited(this).contains(project)) { - return false; - } - - return true; - } - @Override public void onMatch(RelOptRuleCall call) { Project project = call.rel(0); // Register that we have visited this operator in this rule - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - if (registry != null) { - registry.registerVisited(this, project); - } final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(project.getInput()); final List expList = @@ -248,9 +232,6 @@ public boolean matches(RelOptRuleCall call) { if (reduceExpressions(project, expList, predicates)) { RelNode newProject = call.builder().push(project.getInput()) .project(expList, project.getRowType().getFieldNames()).build(); - if (registry != null) { - registry.registerVisited(this, newProject); - } call.transformTo(newProject); // New plan is absolutely better than old plan. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDeterministicSelectivityCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDeterministicSelectivityCost.java new file mode 100644 index 0000000..261fed3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDeterministicSelectivityCost.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.NumberUtil; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +import com.google.common.collect.ImmutableList; + + +public class HiveRelMdDeterministicSelectivityCost { + + private static final HiveRelMdDeterministicSelectivityCost INSTANCE = + new HiveRelMdDeterministicSelectivityCost(); + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.NON_CUMULATIVE_COST.method, INSTANCE), + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE), + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.ROW_COUNT.method, INSTANCE))); + + //~ Methods ---------------------------------------------------------------- + + private HiveRelMdDeterministicSelectivityCost() {} + + public RelOptCost getNonCumulativeCost(Join join) { + double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + double rowCount = leftRCount * rightRCount * + RelMetadataQuery.getSelectivity(join, join.getCondition()); + return HiveCost.FACTORY.makeCost(rowCount, rowCount, 0.0); + } + + public RelOptCost getNonCumulativeCost(Filter filter) { + double dRows = RelMetadataQuery.getRowCount(filter); + double dCpu = RelMetadataQuery.getRowCount(filter.getInput()); + if (dRows == dCpu) { + return HiveCost.FACTORY.makeHugeCost(); + } + return HiveCost.FACTORY.makeCost(dRows, dCpu, 0.0); + } + + public RelOptCost getNonCumulativeCost(Project project) { + double dRows = RelMetadataQuery.getRowCount(project.getInput()); + double dCpu = dRows * project.getChildExps().size(); + return HiveCost.FACTORY.makeCost(dRows, dCpu, 0.0); + } + + public RelOptCost getNonCumulativeCost(Aggregate aggregate) { + double rowCount = RelMetadataQuery.getRowCount(aggregate); + // Aggregates with more aggregate functions cost a bit more + float multiplier = 1f + (float) aggregate.getAggCallList().size() * 0.125f; + for (AggregateCall aggCall : aggregate.getAggCallList()) { + if (aggCall.getAggregation().getName().equals("SUM")) { + // Pretend that SUM costs a little bit more than $SUM0, + // to make things deterministic. + multiplier += 0.0125f; + } + } + return HiveCost.FACTORY.makeCost(rowCount, rowCount * multiplier, 0.0); + } + + public RelOptCost getNonCumulativeCost(Sort sort) { + // Higher cost if rows are wider discourages pushing a project through a + // sort. + double rowCount = RelMetadataQuery.getRowCount(sort); + double bytesPerRow = sort.getRowType().getFieldCount() * 4; + return HiveCost.FACTORY.makeCost(Util.nLogN(rowCount) * bytesPerRow, rowCount, 0.0); + } + + public RelOptCost getNonCumulativeCost(Union union) { + double rowCount = RelMetadataQuery.getRowCount(union); + return HiveCost.FACTORY.makeCost(rowCount, rowCount * 0.1, 0.0); + } + + public RelOptCost getNonCumulativeCost(TableScan tableScan) { + return HiveCost.LARGE; + } + + public RelOptCost getNonCumulativeCost(RelNode rel) { + // by default, assume cost is proportional to number of rows + double rowCount = RelMetadataQuery.getRowCount(rel); + return HiveCost.FACTORY.makeCost(rowCount, rowCount, 0.0); + } + + public Double getDistinctRowCount(TableScan rel, ImmutableBitSet groupKey, + RexNode predicate) { + return HiveCost.LARGE.getRows(); + } + + public Double getRowCount(Filter rel) { + RexNode condition = rel.getCondition(); + List conjuncts = RelOptUtil.conjunctions(condition); + RexNode conditionWithoutPushedPredicates = RexUtil.composeConjunction(rel.getCluster().getRexBuilder(), + HiveCalciteUtil.getPredsNotPushedAlready(rel.getInput(), conjuncts), false); + return NumberUtil.multiply( + RelMetadataQuery.getSelectivity( + rel.getInput(), + conditionWithoutPushedPredicates), + RelMetadataQuery.getRowCount(rel.getInput())); + } + + public Double getRowCount(TableScan rel) { + return HiveCost.LARGE.getRows(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index b7244fd..ef4fbe2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -17,6 +17,15 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; + import org.apache.calcite.linq4j.Linq4j; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.linq4j.function.Predicate1; @@ -39,6 +48,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPermutationShuttle; import org.apache.calcite.rex.RexPermuteInputsShuttle; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; @@ -52,6 +62,8 @@ import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.collect.HashMultimap; @@ -61,18 +73,12 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; - //TODO: Move this to calcite public class HiveRelMdPredicates extends RelMdPredicates { + + private static final Logger LOG = LoggerFactory.getLogger(HiveRelMdPredicates.class); + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( BuiltInMethod.PREDICATES.method, new HiveRelMdPredicates()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 63bf8f2..c6c9b0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -41,7 +41,6 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptPlanner.Executor; -import org.apache.calcite.plan.RelOptQuery; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptUtil; @@ -77,14 +76,12 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.schema.Schemas; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlExplainLevel; @@ -142,6 +139,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; @@ -861,9 +859,10 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); HiveVolcanoPlannerContext confContext = new HiveVolcanoPlannerContext(algorithmsConf); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); - final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); - cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); + cluster = RelOptCluster.create(planner, rexBuilder); + Executor executorProvider = new HiveRexExecutorImpl(cluster); + planner.setExecutor(executorProvider); this.cluster = cluster; this.relOptSchema = relOptSchema; @@ -886,12 +885,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); - // Create executor - Executor executorProvider = new HiveRexExecutorImpl(cluster); - // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - mdProvider.getMetadataProvider(), executorProvider); + mdProvider.getDummyMetadataProvider()); // 3. Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with @@ -943,7 +939,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // 4. Run other optimizations that do not need stats perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_PROJECT_FACTORY), @@ -987,7 +983,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); } @@ -996,7 +992,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // 8.1. Merge join into multijoin operators (if possible) - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER, @@ -1006,15 +1002,15 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_PROJECT_FACTORY)); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), new HiveFilterProjectTSTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); // 8.2. Introduce exchange operators below join/multijoin operators - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Translation from Calcite tree to Hive tree"); @@ -1043,7 +1039,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu * executor * @return */ - private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, Executor executorProvider) { + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. @@ -1058,7 +1054,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Its not clear, if this rewrite is always performant on MR, since extra map phase // introduced for 2nd MR job may offset gains of this multi-stage aggregation. // We need a cost model for MR to enable this on MR. - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } @@ -1069,39 +1065,43 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, + basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY, HivePreFilteringRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); // 3. PPD for old Join Syntax // NOTE: PPD needs to run before adding not null filters in order to - // support old style join syntax (so that on-clauses will get filled up). - // TODO: Add in ReduceExpressionrules (Constant folding) to below once - // HIVE-11927 is fixed. + // support old style join syntax (so that on-clauses will get filled up) perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + basePlan = hepPlan(basePlan, true, mdProvider, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, - HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( - HiveRelFactories.HIVE_FILTER_FACTORY)); + HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), HiveFilterMergeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD for old join syntax"); + // 4. Add not null filters + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, add not null filters"); - // TODO: Transitive inference, constant prop & Predicate push down has to - // do multiple passes till no more inference is left - // Currently doing so would result in a spin. Just checking for if inferred - // pred is present below may not be sufficient as inferred & pushed pred - // could have been mutated by constant folding/prop - // 4. Transitive inference for join on clauses + // 5. PPD, join transitive inference, constant folding perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, new HiveJoinPushTransitivePredicatesRule( - Join.class, HiveRelFactories.HIVE_FILTER_FACTORY)); + basePlan = volcanoPlan(basePlan, mdProvider, + HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, HiveFilterSetOpTransposeRule.INSTANCE, + HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN, + new HiveFilterAggregateTransposeRule(Filter.class, + HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), + HiveFilterMergeRule.INSTANCE, + new HiveJoinPushTransitivePredicatesRule(Join.class, HiveRelFactories.HIVE_FILTER_FACTORY), + HiveReduceExpressionsRule.PROJECT_INSTANCE, HiveReduceExpressionsRule.FILTER_INSTANCE, + HiveReduceExpressionsRule.JOIN_INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Transitive inference for join on clauses"); + "Calcite: Prejoin ordering transformation, PPD, join transitive inference, constant folding"); - // 5. Push down limit through outer join + // 6. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 @@ -1113,56 +1113,30 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveSortMergeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE); - basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } - // 6. Add not null filters - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveJoinAddNotNullRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Add not null filters"); - - // 7. Rerun Constant propagation and PPD now that we have added Not NULL filters & did transitive inference - // TODO: Add in ReduceExpressionrules (Constant folding) to below once - // HIVE-11927 is fixed. + // 7. Push Down Semi Joins perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, - HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, - HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( - HiveRelFactories.HIVE_FILTER_FACTORY)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Constant propagation and PPD"); - - // 8. Push Down Semi Joins - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, + basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); - // 9. Constant folding - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, - HiveReduceExpressionsRule.PROJECT_INSTANCE, HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Constant folding"); - - // 10. Apply Partition Pruning + // 8. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); + basePlan = hepPlan(basePlan, false, mdProvider, new HivePartitionPruneRule(conf)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Partition Pruning"); - // 11. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) + // 9. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); @@ -1170,19 +1144,19 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Projection Pruning"); - // 12. Merge Project-Project if possible + // 10. Merge Project-Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, + basePlan = hepPlan(basePlan, false, mdProvider, new ProjectMergeRule(true, HiveRelFactories.HIVE_PROJECT_FACTORY)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); - // 13. Rerun PPD through Project as column pruning would have introduced + // 11. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, new HiveFilterProjectTSTransposeRule( + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTSTransposeRule( Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, @@ -1202,8 +1176,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, RelOptRule... rules) { - return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider, + RelMetadataProvider mdProvider, RelOptRule... rules) { + return hepPlan(basePlan, followPlanChanges, mdProvider, HepMatchOrder.TOP_DOWN, rules); } @@ -1219,7 +1193,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, Executor executorProvider, HepMatchOrder order, + RelMetadataProvider mdProvider, HepMatchOrder order, RelOptRule... rules) { RelNode optimizedRelNode = basePlan; @@ -1244,16 +1218,41 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, basePlan.getCluster().setMetadataProvider( new CachingRelMetadataProvider(chainedProvider, planner)); - if (executorProvider != null) { - basePlan.getCluster().getPlanner().setExecutor(executorProvider); - } - planner.setRoot(basePlan); optimizedRelNode = planner.findBestExp(); return optimizedRelNode; } + private RelNode volcanoPlan(RelNode basePlan, RelMetadataProvider mdProvider, + RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + + // Metadata providers + List list = Lists.newArrayList(); + list.add(mdProvider); + basePlan.getCluster().getPlanner().registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, basePlan.getCluster().getPlanner())); + + // Register rules + for (int i = 0; i < rules.length; i++) { + basePlan.getCluster().getPlanner().addRule(rules[i]); + } + + basePlan.getCluster().getPlanner().setRoot(basePlan); + optimizedRelNode = basePlan.getCluster().getPlanner().findBestExp(); + + // Unregister rules + for (int i = 0; i < rules.length; i++) { + basePlan.getCluster().getPlanner().removeRule(rules[i]); + } + + return optimizedRelNode; + } + @SuppressWarnings("nls") private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException { diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index b158d85..c4d59c8 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -925,46 +925,24 @@ POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11.0 (type: double) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Limit + Number of rows: 10 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink + ListSink PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 -- outer select - numRows: 2 rawDataSize: 186 @@ -1046,21 +1024,21 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1074,12 +1052,12 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'hello' (type: string), 11.0 (type: double) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/auto_join12.q.out ql/src/test/results/clientpositive/auto_join12.q.out index 8ef3664..27858e7 100644 --- ql/src/test/results/clientpositive/auto_join12.q.out +++ ql/src/test/results/clientpositive/auto_join12.q.out @@ -76,12 +76,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) < 100.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -91,11 +91,11 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash diff --git ql/src/test/results/clientpositive/auto_join16.q.out ql/src/test/results/clientpositive/auto_join16.q.out index c1da6d2..e969e85 100644 --- ql/src/test/results/clientpositive/auto_join16.q.out +++ ql/src/test/results/clientpositive/auto_join16.q.out @@ -50,7 +50,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(key) > 10.0)) (type: boolean) + predicate: (((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/cast1.q.out ql/src/test/results/clientpositive/cast1.q.out index 48a0c14..0bdecba 100644 --- ql/src/test/results/clientpositive/cast1.q.out +++ ql/src/test/results/clientpositive/cast1.q.out @@ -105,11 +105,11 @@ POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 3 + 2, 3.0 + 2, 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 SIMPLE [] -POSTHOOK: Lineage: dest1.c2 SIMPLE [] -POSTHOOK: Lineage: dest1.c3 SIMPLE [] -POSTHOOK: Lineage: dest1.c4 SIMPLE [] -POSTHOOK: Lineage: dest1.c5 SIMPLE [] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [] +POSTHOOK: Lineage: dest1.c5 EXPRESSION [] POSTHOOK: Lineage: dest1.c6 EXPRESSION [] POSTHOOK: Lineage: dest1.c7 EXPRESSION [] PREHOOK: query: select dest1.* FROM dest1 diff --git ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out index eef2389..81dd1c1 100644 --- ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out +++ ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out @@ -1078,7 +1078,9 @@ limit 5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1091,18 +1093,62 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -1110,18 +1156,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1143,7 +1199,9 @@ limit 5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1156,18 +1214,62 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -1175,18 +1277,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/cbo_const.q.out ql/src/test/results/clientpositive/cbo_const.q.out index adc5232..518e246 100644 --- ql/src/test/results/clientpositive/cbo_const.q.out +++ ql/src/test/results/clientpositive/cbo_const.q.out @@ -162,7 +162,7 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 4.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '4.0' (type: string) + expressions: '4' (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -243,9 +243,9 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '3.0' (type: string) + key expressions: '3' (type: string) sort order: + - Map-reduce partition columns: '3.0' (type: string) + Map-reduce partition columns: '3' (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan diff --git ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out index 1b2a2ab..b80c1f5 100644 --- ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out +++ ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out @@ -402,7 +402,7 @@ PREHOOK: query: select 3 * 5 from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"15","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"(3 * 5)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"}]} 15 15 15 @@ -523,7 +523,7 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3 -{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 1))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY @@ -659,7 +659,7 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t -{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"((a.id > 0) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and (b.id > 0) and b.id is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"((a.id > 0) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and b.id is not null and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 diff --git ql/src/test/results/clientpositive/correlationoptimizer13.q.out ql/src/test/results/clientpositive/correlationoptimizer13.q.out index 61b7bcb..048f63b 100644 --- ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -162,7 +162,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and (c1 < 120)) and c3 is not null) (type: boolean) + predicate: (((c2 > 100) and c3 is not null) and (c1 < 120)) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 368a114..ba54b87 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -103,7 +103,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -290,7 +290,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -963,7 +963,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out index 141bcd8..24ac550 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out @@ -1560,7 +1560,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), 'day' (type: string) + expressions: UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1642,9 +1642,8 @@ group by "day", key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1666,7 +1665,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: @@ -1677,39 +1676,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), 'day' (type: string) + expressions: UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.hive13_dp1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.hive13_dp1 Stage: Stage-0 Move Operator @@ -1723,7 +1700,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 5e0edbc..a29dedd 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -59,7 +59,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col1 = '2008-04-08') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + predicate: (_col3 is not null and (((_col1 = '2008-04-08') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09'))) (type: boolean) Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -185,7 +185,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + predicate: (_col3 is not null and (((_col1) IN ('2008-04-08', '2008-04-10') and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09'))) (type: boolean) Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -317,7 +317,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: string) - 1 UDFToDouble(_col1) (type: double), _col0 (type: string) + 1 UDFToDouble(1) (type: double), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col5 Statistics: Num rows: 22 Data size: 288 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 13d17aa..a41fa48 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -168,7 +168,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((value is not null and (value <> '')) and key is not null) (type: boolean) + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index c2566f2..57be001 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -647,7 +647,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 10.0)) (type: boolean) + predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/groupby_ppd.q.out ql/src/test/results/clientpositive/groupby_ppd.q.out index e3e4a50..6164a26 100644 --- ql/src/test/results/clientpositive/groupby_ppd.q.out +++ ql/src/test/results/clientpositive/groupby_ppd.q.out @@ -28,16 +28,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col1 + expressions: 1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: 1 (type: int), _col1 (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -54,16 +54,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col1 + expressions: 1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: 1 (type: int), _col1 (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -79,7 +79,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: int), 1 (type: int) + expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index ceecbb9..7333677 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -1510,7 +1510,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1942,7 +1942,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), 1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4639,7 +4639,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), 1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5054,7 +5054,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), 1 (type: int), _col2 (type: string), 2 (type: int), UDFToInteger(_col4) (type: int) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5443,7 +5443,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), 1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5901,7 +5901,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), 2 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out index 8c71925..31b888e 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -38,7 +38,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -277,10 +277,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: b - filterExpr: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + filterExpr: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -313,9 +313,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__srcpart_srcpart_index_bitmap__ - filterExpr: (((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + filterExpr: (((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator - predicate: (((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + predicate: (((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _bucketname, _offset diff --git ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out index b3e6989..d0fc22d 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -38,7 +38,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -277,10 +277,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: b - filterExpr: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + filterExpr: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -341,9 +341,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__srcpart_srcpart_index_compact__ - filterExpr: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + filterExpr: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Filter Operator - predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((((UDFToDouble(key) < 90.0) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/input_part1.q.out ql/src/test/results/clientpositive/input_part1.q.out index c5c46af..d6f4d3e 100644 --- ql/src/test/results/clientpositive/input_part1.q.out +++ ql/src/test/results/clientpositive/input_part1.q.out @@ -365,8 +365,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.ds SIMPLE [] -POSTHOOK: Lineage: dest1.hr SIMPLE [] +POSTHOOK: Lineage: dest1.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 diff --git ql/src/test/results/clientpositive/input_part5.q.out ql/src/test/results/clientpositive/input_part5.q.out index c6ae2fd..f2d7335 100644 --- ql/src/test/results/clientpositive/input_part5.q.out +++ ql/src/test/results/clientpositive/input_part5.q.out @@ -114,7 +114,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.ds SIMPLE [(srcpart)x.FieldSchema(name:hr, type:string, comment:null), ] -POSTHOOK: Lineage: tmptable.hr SIMPLE [] +POSTHOOK: Lineage: tmptable.hr SIMPLE [(srcpart)x.FieldSchema(name:ds, type:string, comment:null), ] POSTHOOK: Lineage: tmptable.key SIMPLE [(srcpart)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmptable.value SIMPLE [(srcpart)x.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from tmptable x sort by x.key,x.value,x.ds,x.hr diff --git ql/src/test/results/clientpositive/input_part6.q.out ql/src/test/results/clientpositive/input_part6.q.out index c01d8af..fa51cdf 100644 --- ql/src/test/results/clientpositive/input_part6.q.out +++ ql/src/test/results/clientpositive/input_part6.q.out @@ -19,7 +19,7 @@ STAGE PLANS: predicate: (UDFToDouble(ds) = 1996.0) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '1996.0' (type: string), hr (type: string) + expressions: key (type: string), value (type: string), '1996' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit diff --git ql/src/test/results/clientpositive/join12.q.out ql/src/test/results/clientpositive/join12.q.out index 8217c86..87b2a8e 100644 --- ql/src/test/results/clientpositive/join12.q.out +++ ql/src/test/results/clientpositive/join12.q.out @@ -66,17 +66,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) < 100.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -88,14 +88,14 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/join16.q.out ql/src/test/results/clientpositive/join16.q.out index 244eb46..2943464 100644 --- ql/src/test/results/clientpositive/join16.q.out +++ ql/src/test/results/clientpositive/join16.q.out @@ -29,7 +29,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(key) > 10.0)) (type: boolean) + predicate: (((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index 795dd3a..9226230 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -159,7 +159,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/join42.q.out ql/src/test/results/clientpositive/join42.q.out index 6e09e38..1592a93 100644 --- ql/src/test/results/clientpositive/join42.q.out +++ ql/src/test/results/clientpositive/join42.q.out @@ -135,7 +135,7 @@ STAGE PLANS: alias: la Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((loan_id = 4436) and aid is not null) and pi_id is not null) (type: boolean) + predicate: ((aid is not null and pi_id is not null) and (loan_id = 4436)) (type: boolean) Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: aid (type: int), pi_id (type: int) diff --git ql/src/test/results/clientpositive/limit_join_transpose.q.out ql/src/test/results/clientpositive/limit_join_transpose.q.out index 759aebb..b84ca3f 100644 --- ql/src/test/results/clientpositive/limit_join_transpose.q.out +++ ql/src/test/results/clientpositive/limit_join_transpose.q.out @@ -306,13 +306,14 @@ on src1.key = src2.key limit 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -345,7 +346,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -377,23 +378,43 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -407,29 +428,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1234,13 +1258,14 @@ on src1.key = src2.key limit 1 offset 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1275,7 +1300,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1307,23 +1332,45 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1337,30 +1384,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Offset of rows: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/lineage2.q.out ql/src/test/results/clientpositive/lineage2.q.out index a189f82..ea2084d 100644 --- ql/src/test/results/clientpositive/lineage2.q.out +++ ql/src/test/results/clientpositive/lineage2.q.out @@ -402,7 +402,7 @@ PREHOOK: query: select 3 * 5 from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"15","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"(3 * 5)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"}]} 15 15 15 @@ -523,14 +523,14 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3 -{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 1))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"mr","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 3))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -659,7 +659,7 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t -{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"((a.id > 0) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and (b.id > 0) and b.id is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"((a.id > 0) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and b.id is not null and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 diff --git ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index 48cc6ea..be77ba8 100644 --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -690,10 +690,12 @@ STAGE PLANS: predicate: (x = 484) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: 484 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: 484 (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -764,32 +766,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 484 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/mergejoin.q.out ql/src/test/results/clientpositive/mergejoin.q.out index acf816e..b794f1a 100644 --- ql/src/test/results/clientpositive/mergejoin.q.out +++ ql/src/test/results/clientpositive/mergejoin.q.out @@ -2681,14 +2681,12 @@ NULL NULL NULL 98 val_98 2008-04-08 PREHOOK: query: select * from (select * from tab where tab.key = 0)a right outer join (select * from tab_part where tab_part.key = 98)b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select * from (select * from tab where tab.key = 0)a right outer join (select * from tab_part where tab_part.key = 98)b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -2700,7 +2698,6 @@ full outer join (select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key PREHOOK: type: QUERY PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -2710,7 +2707,6 @@ full outer join (select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -2720,7 +2716,6 @@ full outer join (select * from tab_part where tab_part.key = 98)b on a.key = b.key join tab_part c on b.key = c.key PREHOOK: type: QUERY PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -2730,7 +2725,6 @@ full outer join (select * from tab_part where tab_part.key = 98)b on a.key = b.key join tab_part c on b.key = c.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -3271,7 +3265,6 @@ join (select * from tab_part where tab_part.key = 98)b on a.key = b.key full outer join tab_part c on b.key = c.key PREHOOK: type: QUERY PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### @@ -3281,7 +3274,6 @@ join (select * from tab_part where tab_part.key = 98)b on a.key = b.key full outer join tab_part c on b.key = c.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/mergejoins.q.out ql/src/test/results/clientpositive/mergejoins.q.out index 9010410..e21ff0e 100644 --- ql/src/test/results/clientpositive/mergejoins.q.out +++ ql/src/test/results/clientpositive/mergejoins.q.out @@ -45,7 +45,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -55,7 +56,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (val1 is not null and val2 is not null) (type: boolean) + predicate: val1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: val1 (type: int), val2 (type: int) @@ -99,6 +100,37 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 is not null and _col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) TableScan alias: d Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -119,15 +151,11 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -135,7 +163,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -143,7 +171,7 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) TableScan alias: e @@ -169,10 +197,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -233,16 +261,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 10f37f9..197e6e6 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -61,29 +61,35 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -160,16 +166,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -460,16 +469,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 90032fe..f6d8388 100644 --- ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -768,23 +768,23 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 888 Basic stats: COMPLETE Column stats: NONE @@ -834,26 +834,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 888 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out index 7c5be6d..b322ef1 100644 --- ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out @@ -756,23 +756,23 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE @@ -822,26 +822,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index 1d7efe8..a442425 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -68,7 +68,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Lineage: pcr_t2.ds SIMPLE [] +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: from pcr_t1 @@ -83,8 +83,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Lineage: pcr_t2.ds SIMPLE [] -POSTHOOK: Lineage: pcr_t2.key SIMPLE [] +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain extended select key, value, ds diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out index c503652..a56f66c 100644 --- ql/src/test/results/clientpositive/ppd_join2.q.out +++ ql/src/test/results/clientpositive/ppd_join2.q.out @@ -39,7 +39,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and value is not null) (type: boolean) + predicate: (((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and value is not null) and (key <> '305')) and (key <> '14')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1723,7 +1723,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key <> '14')) and value is not null) (type: boolean) + predicate: (((((((key <> '302') and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key < '400')) and value is not null) and (key <> '305')) and (key <> '14')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/ppd_join3.q.out ql/src/test/results/clientpositive/ppd_join3.q.out index 0000db1..f590fe0 100644 --- ql/src/test/results/clientpositive/ppd_join3.q.out +++ ql/src/test/results/clientpositive/ppd_join3.q.out @@ -39,7 +39,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and (key <> '13')) and (key <> '1')) (type: boolean) + predicate: ((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: (((_col1 > '10') or (_col0 <> '10')) and (_col0 <> '13') and (_col0 <> '1')) (type: boolean) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) @@ -1779,7 +1779,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) and (key <> '13')) and (key <> '1')) (type: boolean) + predicate: ((((((key <> '11') and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1816,7 +1816,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean) + predicate: (((_col1 > '10') or (_col0 <> '10')) and (_col0 <> '13') and (_col0 <> '1')) (type: boolean) Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) diff --git ql/src/test/results/clientpositive/ppd_join_filter.q.out ql/src/test/results/clientpositive/ppd_join_filter.q.out index e3b19d4..18d2457 100644 --- ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -132,9 +132,9 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) @@ -209,7 +209,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + predicate: (((UDFToDouble(_col1) + 1.0) < 5.0) and _col0 is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) @@ -546,9 +546,9 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) @@ -623,7 +623,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + predicate: (((UDFToDouble(_col1) + 1.0) < 5.0) and _col0 is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) @@ -960,9 +960,9 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) @@ -1037,7 +1037,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + predicate: (((UDFToDouble(_col1) + 1.0) < 5.0) and _col0 is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) @@ -1374,9 +1374,9 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(key) @@ -1451,7 +1451,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + predicate: (((UDFToDouble(_col1) + 1.0) < 5.0) and _col0 is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) diff --git ql/src/test/results/clientpositive/ppd_outer_join4.q.out ql/src/test/results/clientpositive/ppd_outer_join4.q.out index 289798c..d90da49 100644 --- ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -38,7 +38,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key < '25')) and (key > '15')) and (key < '20')) and (key > '10')) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key < '25')) and (key > '15')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -53,7 +53,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key < '25')) and (key > '15')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) (type: boolean) + predicate: (((((key < '25') and (key > '15')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -402,7 +402,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((sqrt(key) <> 13.0) and (key < '25')) and (key > '15')) and (key < '20')) and (key > '10')) (type: boolean) + predicate: (((((sqrt(key) <> 13.0) and (key > '10')) and (key < '20')) and (key < '25')) and (key > '15')) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -417,7 +417,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((((key > '10') and (key < '20')) and (key < '25')) and (key > '15')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -433,7 +433,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) (type: boolean) + predicate: (((((key < '25') and (key > '15')) and (key > '10')) and (key < '20')) and (sqrt(key) <> 13.0)) (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/ppd_union_view.q.out ql/src/test/results/clientpositive/ppd_union_view.q.out index ba51cbd..182c0eb 100644 --- ql/src/test/results/clientpositive/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -541,7 +541,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((ds = '2011-10-15') and keymap is not null) (type: boolean) + predicate: (keymap is not null and (ds = '2011-10-15')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), keymap (type: string) diff --git ql/src/test/results/clientpositive/quotedid_basic.q.out ql/src/test/results/clientpositive/quotedid_basic.q.out index 29736af..519f647 100644 --- ql/src/test/results/clientpositive/quotedid_basic.q.out +++ ql/src/test/results/clientpositive/quotedid_basic.q.out @@ -101,11 +101,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string) - outputColumnNames: _col0, _col1 + expressions: x+1 (type: string), y&y (type: string), '1' (type: string) + outputColumnNames: x+1, y&y, !@#$%^&*()_q Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '1' (type: string) + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -120,17 +120,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -160,11 +156,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string) - outputColumnNames: _col0, _col1 + expressions: x+1 (type: string), y&y (type: string), '1' (type: string) + outputColumnNames: x+1, y&y, !@#$%^&*()_q Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '1' (type: string) + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -191,27 +187,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: '1' (type: string), _col1 (type: string) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: '1' (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string + output shape: _col0: string, _col1: string, _col2: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction order by: _col1 - partition by: '1' + partition by: _col2 raw input shape: window functions: window function definition @@ -223,7 +219,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), '1' (type: string), rank_window_0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -264,11 +260,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string) - outputColumnNames: _col0, _col1 + expressions: x+1 (type: string), y&y (type: string), '1' (type: string) + outputColumnNames: x+1, y&y, !@#$%^&*()_q Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '1' (type: string) + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -295,27 +291,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: '1' (type: string), _col1 (type: string) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: '1' (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string + output shape: _col0: string, _col1: string, _col2: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction order by: _col1 - partition by: '1' + partition by: _col2 raw input shape: window functions: window function definition @@ -327,7 +323,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), '1' (type: string), rank_window_0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/quotedid_partition.q.out ql/src/test/results/clientpositive/quotedid_partition.q.out index e40d0d0..d34a005 100644 --- ql/src/test/results/clientpositive/quotedid_partition.q.out +++ ql/src/test/results/clientpositive/quotedid_partition.q.out @@ -46,11 +46,11 @@ STAGE PLANS: predicate: (x+1 = '10') (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: y&y (type: string) - outputColumnNames: _col1 + expressions: '10' (type: string), y&y (type: string), 'a' (type: string) + outputColumnNames: x+1, y&y, !@#$%^&*()_q Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: '10' (type: string), _col1 (type: string), 'a' (type: string) + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -65,17 +65,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '10' (type: string), _col1 (type: string), 'a' (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/skewjoin.q.out ql/src/test/results/clientpositive/skewjoin.q.out index 4e98dfd..a4cf310 100644 --- ql/src/test/results/clientpositive/skewjoin.q.out +++ ql/src/test/results/clientpositive/skewjoin.q.out @@ -984,17 +984,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) < 100.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -1007,11 +1007,11 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1068,7 +1068,7 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1146,7 +1146,7 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out index f3a2705..698db03 100644 --- ql/src/test/results/clientpositive/subquery_exists.q.out +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -36,7 +36,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (key is not null and (value > 'val_9')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 87c5a62..80bea5e 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -280,8 +280,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -303,7 +304,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col2 is not null (type: boolean) + predicate: (_col2 is not null and _col1 is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -507,8 +508,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -530,7 +532,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) + predicate: (((_col1 - _col2) < 20) and _col0 is not null) (type: boolean) Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -607,8 +609,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -655,7 +658,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) + predicate: (((_col1 - _col2) < 20) and _col0 is not null) (type: boolean) Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -1205,21 +1208,25 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_name (type: string), p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -1227,16 +1234,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator diff --git ql/src/test/results/clientpositive/subquery_notexists.q.out ql/src/test/results/clientpositive/subquery_notexists.q.out index 215d855..70f1677 100644 --- ql/src/test/results/clientpositive/subquery_notexists.q.out +++ ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -275,7 +275,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -302,9 +302,9 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator @@ -312,11 +312,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col2 is null (type: boolean) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/subquery_notexists_having.q.out ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 637fc62..65f0a32 100644 --- ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -223,9 +223,9 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator @@ -233,11 +233,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col2 is null (type: boolean) Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -278,7 +278,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index e34a401..71f7cfc 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -52,7 +52,7 @@ STAGE PLANS: alias: src11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key1 > '9') and value1 is not null) (type: boolean) + predicate: (value1 is not null and (key1 > '9')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key1 (type: string), value1 (type: string) @@ -122,7 +122,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (value is not null and (key > '9')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -586,7 +586,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (value is not null and (key > '9')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -653,8 +653,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -676,7 +677,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col2 is not null (type: boolean) + predicate: (_col2 is not null and _col1 is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/subquery_views.q.out ql/src/test/results/clientpositive/subquery_views.q.out index 76e53d3..5fac572 100644 --- ql/src/test/results/clientpositive/subquery_views.q.out +++ ql/src/test/results/clientpositive/subquery_views.q.out @@ -111,8 +111,8 @@ where `b`.`key` not in from `default`.`src` `a` where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_11' ), tableType:VIRTUAL_VIEW) -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[42][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -176,17 +176,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < '11') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator sort order: @@ -199,13 +196,16 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 < '11') (type: boolean) + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -215,22 +215,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value > 'val_11') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), key (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -239,14 +239,14 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string), _col0 (type: string) 1 _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 201 Data size: 2138 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col3 is null (type: boolean) - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1063 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1063 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -262,7 +262,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1063 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator @@ -278,10 +278,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1169 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1169 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -420,8 +420,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[42][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf1.q.out ql/src/test/results/clientpositive/udf1.q.out index b3b694b..dffbccf 100644 --- ql/src/test/results/clientpositive/udf1.q.out +++ ql/src/test/results/clientpositive/udf1.q.out @@ -137,26 +137,26 @@ POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 'a' LIKE '%a%', 'b POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 SIMPLE [] -POSTHOOK: Lineage: dest1.c10 SIMPLE [] -POSTHOOK: Lineage: dest1.c11 SIMPLE [] -POSTHOOK: Lineage: dest1.c12 SIMPLE [] -POSTHOOK: Lineage: dest1.c13 SIMPLE [] -POSTHOOK: Lineage: dest1.c14 SIMPLE [] -POSTHOOK: Lineage: dest1.c15 SIMPLE [] -POSTHOOK: Lineage: dest1.c16 SIMPLE [] -POSTHOOK: Lineage: dest1.c17 SIMPLE [] -POSTHOOK: Lineage: dest1.c18 SIMPLE [] -POSTHOOK: Lineage: dest1.c19 SIMPLE [] -POSTHOOK: Lineage: dest1.c2 SIMPLE [] -POSTHOOK: Lineage: dest1.c20 SIMPLE [] -POSTHOOK: Lineage: dest1.c3 SIMPLE [] -POSTHOOK: Lineage: dest1.c4 SIMPLE [] -POSTHOOK: Lineage: dest1.c5 SIMPLE [] -POSTHOOK: Lineage: dest1.c6 SIMPLE [] -POSTHOOK: Lineage: dest1.c7 SIMPLE [] -POSTHOOK: Lineage: dest1.c8 SIMPLE [] -POSTHOOK: Lineage: dest1.c9 SIMPLE [] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [] +POSTHOOK: Lineage: dest1.c11 EXPRESSION [] +POSTHOOK: Lineage: dest1.c12 EXPRESSION [] +POSTHOOK: Lineage: dest1.c13 EXPRESSION [] +POSTHOOK: Lineage: dest1.c14 EXPRESSION [] +POSTHOOK: Lineage: dest1.c15 EXPRESSION [] +POSTHOOK: Lineage: dest1.c16 EXPRESSION [] +POSTHOOK: Lineage: dest1.c17 EXPRESSION [] +POSTHOOK: Lineage: dest1.c18 EXPRESSION [] +POSTHOOK: Lineage: dest1.c19 EXPRESSION [] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [] +POSTHOOK: Lineage: dest1.c20 EXPRESSION [] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [] +POSTHOOK: Lineage: dest1.c5 EXPRESSION [] +POSTHOOK: Lineage: dest1.c6 EXPRESSION [] +POSTHOOK: Lineage: dest1.c7 EXPRESSION [] +POSTHOOK: Lineage: dest1.c8 EXPRESSION [] +POSTHOOK: Lineage: dest1.c9 EXPRESSION [] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/udf_10_trims.q.out ql/src/test/results/clientpositive/udf_10_trims.q.out index 3a5303a..2f79723 100644 --- ql/src/test/results/clientpositive/udf_10_trims.q.out +++ ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -117,4 +117,4 @@ WHERE src.key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [] diff --git ql/src/test/results/clientpositive/udf_folder_constants.q.out ql/src/test/results/clientpositive/udf_folder_constants.q.out index ef07420..3830daf 100644 --- ql/src/test/results/clientpositive/udf_folder_constants.q.out +++ ql/src/test/results/clientpositive/udf_folder_constants.q.out @@ -63,12 +63,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: month (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE TableScan alias: b @@ -90,7 +90,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/udf_unix_timestamp.q.out ql/src/test/results/clientpositive/udf_unix_timestamp.q.out index c64379d..1a22000 100644 --- ql/src/test/results/clientpositive/udf_unix_timestamp.q.out +++ ql/src/test/results/clientpositive/udf_unix_timestamp.q.out @@ -72,6 +72,7 @@ POSTHOOK: Input: default@oneline 2009 Mar 20 11:30:01 am 1237573801 unix_timestamp(void) is deprecated. Use current_timestamp instead. unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. PREHOOK: query: create table foo as SELECT 'deprecated' as a, unix_timestamp() as b diff --git ql/src/test/results/clientpositive/union_remove_25.q.out ql/src/test/results/clientpositive/union_remove_25.q.out index d82fcfc..c98d4c8 100644 --- ql/src/test/results/clientpositive/union_remove_25.q.out +++ ql/src/test/results/clientpositive/union_remove_25.q.out @@ -467,7 +467,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 @@ -476,17 +476,17 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -519,7 +519,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 @@ -528,17 +528,17 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/union_view.q.out ql/src/test/results/clientpositive/union_view.q.out index 1d93159..66ca51b 100644 --- ql/src/test/results/clientpositive/union_view.q.out +++ ql/src/test/results/clientpositive/union_view.q.out @@ -358,12 +358,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + expressions: 86 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -382,12 +382,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + expressions: 86 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -406,12 +406,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + expressions: 86 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -471,12 +471,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '2' (type: string) + expressions: 86 (type: int), _col1 (type: string), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -495,12 +495,12 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '2' (type: string) + expressions: 86 (type: int), _col1 (type: string), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -519,12 +519,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '2' (type: string) + expressions: 86 (type: int), _col1 (type: string), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -584,12 +584,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '3' (type: string) + expressions: 86 (type: int), _col1 (type: string), '3' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -608,12 +608,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '3' (type: string) + expressions: 86 (type: int), _col1 (type: string), '3' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -632,12 +632,12 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '3' (type: string) + expressions: 86 (type: int), _col1 (type: string), '3' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 502 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -701,12 +701,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -723,12 +723,12 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -745,12 +745,12 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1226,12 +1226,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '4' (type: string) + expressions: 86 (type: int), _col1 (type: string), '4' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1250,12 +1250,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '4' (type: string) + expressions: 86 (type: int), _col1 (type: string), '4' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1274,12 +1274,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 86 (type: int), _col0 (type: string), '4' (type: string) + expressions: 86 (type: int), _col1 (type: string), '4' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 252 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/vector_decimal_round.q.out ql/src/test/results/clientpositive/vector_decimal_round.q.out index ec6226e..25e5cfa 100644 --- ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -106,7 +106,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, -1) (type: decimal(11,0)) + key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) @@ -242,7 +242,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, -1) (type: decimal(11,0)) + key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) @@ -379,7 +379,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, -1) (type: decimal(11,0)) + key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index dd40f28..dd52c03 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -466,7 +466,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean) + predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int)