diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java new file mode 100644 index 0000000..bbf518d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.HashSet; +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; + +public class HiveRexExecutorImpl implements RelOptPlanner.Executor { + + private final RelOptCluster cluster; + + public HiveRexExecutorImpl(RelOptCluster cluster) { + this.cluster = cluster; + } + + @Override + public void reduce(RexBuilder rexBuilder, List constExps, List reducedValues) { + RexNodeConverter rexNodeConverter = new RexNodeConverter(cluster); + for (RexNode rexNode : constExps) { + // initialize the converter + ExprNodeConverter converter = new ExprNodeConverter("", null, null, null, + new HashSet(), cluster.getTypeFactory()); + // convert RexNode to ExprNodeGenericFuncDesc + ExprNodeDesc expr = rexNode.accept(converter); + if (expr instanceof ExprNodeGenericFuncDesc) { + // folding the constant + ExprNodeDesc constant = ConstantPropagateProcFactory + .foldExpr((ExprNodeGenericFuncDesc) expr); + if (constant != null) { + try { + // convert constant back to RexNode + reducedValues.add(rexNodeConverter.convert((ExprNodeConstantDesc) constant)); + } catch (Exception e) { + e.printStackTrace(); + reducedValues.add(rexNode); + } + } else { + reducedValues.add(rexNode); + } + } else { + reducedValues.add(rexNode); + } + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java new file mode 100644 index 0000000..17670de --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -0,0 +1,853 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.EquiJoin; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinInfo; +import org.apache.calcite.rel.logical.LogicalCalc; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexProgram; +import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexRangeRef; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlRowOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Stacks; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * Collection of planner rules that apply various simplifying transformations on + * RexNode trees. Currently, there are two transformations: + * + *
    + *
  • Constant reduction, which evaluates constant subtrees, replacing them + * with a corresponding RexLiteral + *
  • Removal of redundant casts, which occurs when the argument into the cast + * is the same as the type of the resulting cast expression + *
+ */ +public abstract class HiveReduceExpressionsRule extends RelOptRule { + // ~ Static fields/initializers --------------------------------------------- + + /** + * Regular expression that matches the description of all instances of this + * rule and {@link ValuesReduceRule} also. Use it to prevent the planner from + * invoking these rules. + */ + public static final Pattern EXCLUSION_PATTERN = Pattern + .compile("Reduce(Expressions|Values)Rule.*"); + + /** + * Singleton rule that reduces constants inside a + * {@link org.apache.calcite.rel.logical.HiveFilter}. If the condition is a + * constant, the filter is removed (if TRUE) or replaced with an empty + * {@link org.apache.calcite.rel.core.Values} (if FALSE or NULL). + */ + public static final HiveReduceExpressionsRule FILTER_INSTANCE = new HiveReduceExpressionsRule( + HiveFilter.class, "HiveReduceExpressionsRule(Filter)") { + public void onMatch(RelOptRuleCall call) { + final HiveFilter filter = call.rel(0); + final List expList = Lists.newArrayList(filter.getCondition()); + RexNode newConditionExp; + boolean reduced; + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter + .getInput()); + if (reduceExpressions(filter, expList, predicates)) { + assert expList.size() == 1; + newConditionExp = expList.get(0); + reduced = true; + } else { + // No reduction, but let's still test the original + // predicate to see if it was already a constant, + // in which case we don't need any runtime decision + // about filtering. + newConditionExp = filter.getCondition(); + reduced = false; + } + if (newConditionExp.isAlwaysTrue()) { + call.transformTo(filter.getInput()); + } else if (newConditionExp instanceof RexLiteral + || RexUtil.isNullLiteral(newConditionExp, true)) { + //TODO: similar to null scan optimization + //call.transformTo(LogicalValues.createEmpty(filter.getCluster(), filter.getRowType())); + return; + } else if (reduced) { + call.transformTo(RelOptUtil.createFilter(filter.getInput(), expList.get(0))); + } else { + if (newConditionExp instanceof RexCall) { + RexCall rexCall = (RexCall) newConditionExp; + boolean reverse = rexCall.getOperator() == SqlStdOperatorTable.NOT; + if (reverse) { + rexCall = (RexCall) rexCall.getOperands().get(0); + } + reduceNotNullableFilter(call, filter, rexCall, reverse); + } + return; + } + + // New plan is absolutely better than old plan. + call.getPlanner().setImportance(filter, 0.0); + } + + private void reduceNotNullableFilter(RelOptRuleCall call, HiveFilter filter, RexCall rexCall, + boolean reverse) { + // If the expression is a IS [NOT] NULL on a non-nullable + // column, then we can either remove the filter or replace + // it with an Empty. + boolean alwaysTrue; + switch (rexCall.getKind()) { + case IS_NULL: + case IS_UNKNOWN: + alwaysTrue = false; + break; + case IS_NOT_NULL: + alwaysTrue = true; + break; + default: + return; + } + if (reverse) { + alwaysTrue = !alwaysTrue; + } + RexNode operand = rexCall.getOperands().get(0); + if (operand instanceof RexInputRef) { + RexInputRef inputRef = (RexInputRef) operand; + if (!inputRef.getType().isNullable()) { + if (alwaysTrue) { + call.transformTo(filter.getInput()); + } else { + call.transformTo(LogicalValues.createEmpty(filter.getCluster(), filter.getRowType())); + } + } + } + } + }; + + public static final HiveReduceExpressionsRule PROJECT_INSTANCE = new HiveReduceExpressionsRule( + HiveProject.class, "HiveReduceExpressionsRule(Project)") { + + public boolean matches(RelOptRuleCall call) { + HiveProject project = call.rel(0); + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + + // If this operator has been visited already by the rule, + // we do not need to apply the optimization + if (registry != null && registry.getVisited(this).contains(project)) { + return false; + } + + return true; + } + + public void onMatch(RelOptRuleCall call) { + HiveProject project = call.rel(0); + // 0. Register that we have visited this operator in this rule + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + if (registry != null) { + registry.registerVisited(this, project); + } + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(project + .getInput()); + final List expList = Lists.newArrayList(project.getProjects()); + if (reduceExpressions(project, expList, predicates)) { + HiveProject newProject = HiveProject.create(project.getCluster(), project.getInput(), expList, + project.getRowType(), project.getCollationList()); + if (registry != null) { + registry.registerVisited(this, newProject); + } + call.transformTo(newProject); + // New plan is absolutely better than old plan. + call.getPlanner().setImportance(project, 0.0); + } + } + }; + + public static final HiveReduceExpressionsRule JOIN_INSTANCE = new HiveReduceExpressionsRule( + Join.class, "HiveReduceExpressionsRule(Join)") { + public void onMatch(RelOptRuleCall call) { + final Join join = call.rel(0); + final List expList = Lists.newArrayList(join.getCondition()); + final int fieldCount = join.getLeft().getRowType().getFieldCount(); + final RelOptPredicateList leftPredicates = RelMetadataQuery.getPulledUpPredicates(join + .getLeft()); + final RelOptPredicateList rightPredicates = RelMetadataQuery.getPulledUpPredicates(join + .getRight()); + final RelOptPredicateList predicates = leftPredicates + .union(rightPredicates.shift(fieldCount)); + if (!reduceExpressions(join, expList, predicates)) { + return; + } + if (join instanceof EquiJoin) { + final JoinInfo joinInfo = JoinInfo.of(join.getLeft(), join.getRight(), expList.get(0)); + if (!joinInfo.isEqui()) { + // This kind of join must be an equi-join, and the condition is + // no longer an equi-join. SemiJoin is an example of this. + return; + } + } + call.transformTo(join.copy(join.getTraitSet(), expList.get(0), join.getLeft(), + join.getRight(), join.getJoinType(), join.isSemiJoinDone())); + + // New plan is absolutely better than old plan. + call.getPlanner().setImportance(join, 0.0); + } + }; + + public static final HiveReduceExpressionsRule CALC_INSTANCE = new HiveReduceExpressionsRule( + LogicalCalc.class, "HiveReduceExpressionsRule(Calc)") { + public void onMatch(RelOptRuleCall call) { + LogicalCalc calc = call.rel(0); + RexProgram program = calc.getProgram(); + final List exprList = program.getExprList(); + + // Form a list of expressions with sub-expressions fully expanded. + final List expandedExprList = Lists.newArrayList(); + final RexShuttle shuttle = new RexShuttle() { + public RexNode visitLocalRef(RexLocalRef localRef) { + return expandedExprList.get(localRef.getIndex()); + } + }; + for (RexNode expr : exprList) { + expandedExprList.add(expr.accept(shuttle)); + } + final RelOptPredicateList predicates = RelOptPredicateList.EMPTY; + if (reduceExpressions(calc, expandedExprList, predicates)) { + final RexProgramBuilder builder = new RexProgramBuilder(calc.getInput().getRowType(), calc + .getCluster().getRexBuilder()); + final List list = Lists.newArrayList(); + for (RexNode expr : expandedExprList) { + list.add(builder.registerInput(expr)); + } + if (program.getCondition() != null) { + final int conditionIndex = program.getCondition().getIndex(); + final RexNode newConditionExp = expandedExprList.get(conditionIndex); + if (newConditionExp.isAlwaysTrue()) { + // condition is always TRUE - drop it + } else if (newConditionExp instanceof RexLiteral + || RexUtil.isNullLiteral(newConditionExp, true)) { + // condition is always NULL or FALSE - replace calc + // with empty + call.transformTo(LogicalValues.createEmpty(calc.getCluster(), calc.getRowType())); + return; + } else { + builder.addCondition(list.get(conditionIndex)); + } + } + int k = 0; + for (RexLocalRef projectExpr : program.getProjectList()) { + final int index = projectExpr.getIndex(); + builder.addProject(list.get(index).getIndex(), program.getOutputRowType().getFieldNames() + .get(k++)); + } + call.transformTo(LogicalCalc.create(calc.getInput(), builder.getProgram())); + + // New plan is absolutely better than old plan. + call.getPlanner().setImportance(calc, 0.0); + } + } + }; + + // ~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveReduceExpressionsRule. + * + * @param clazz + * class of rels to which this rule should apply + */ + private HiveReduceExpressionsRule(Class clazz, String desc) { + super(operand(clazz, any()), desc); + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Reduces a list of expressions. + * + * @param rel + * Relational expression + * @param expList + * List of expressions, modified in place + * @param predicates + * Constraints known to hold on input expressions + * @return whether reduction found something to change, and succeeded + */ + static boolean reduceExpressions(RelNode rel, List expList, + RelOptPredicateList predicates) { + RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + + // Replace predicates on CASE to CASE on predicates. + for (int i = 0; i < expList.size(); i++) { + RexNode exp = expList.get(i); + if (exp instanceof RexCall) { + RexNode exp2 = pushPredicateIntoCase((RexCall) exp); + if (exp2 != exp) { + expList.set(i, exp2); + } + } + } + + // Find reducible expressions. + final List constExps = Lists.newArrayList(); + List addCasts = Lists.newArrayList(); + final List removableCasts = Lists.newArrayList(); + final ImmutableMap constants = predicateConstants(predicates); + findReducibleExps(rel.getCluster().getTypeFactory(), expList, constants, constExps, addCasts, + removableCasts); + if (constExps.isEmpty() && removableCasts.isEmpty()) { + return false; + } + + // Remove redundant casts before reducing constant expressions. + // If the argument to the redundant cast is a reducible constant, + // reducing that argument to a constant first will result in not being + // able to locate the original cast expression. + if (!removableCasts.isEmpty()) { + final List reducedExprs = Lists.newArrayList(); + for (RexNode exp : removableCasts) { + RexCall call = (RexCall) exp; + reducedExprs.add(call.getOperands().get(0)); + } + RexReplacer replacer = new RexReplacer(rexBuilder, removableCasts, reducedExprs, + Collections.nCopies(removableCasts.size(), false)); + replacer.mutate(expList); + } + + if (constExps.isEmpty()) { + return true; + } + + final List constExps2 = Lists.newArrayList(constExps); + if (!constants.isEmpty()) { + // noinspection unchecked + final List> pairs = (List>) (List) Lists + .newArrayList(constants.entrySet()); + RexReplacer replacer = new RexReplacer(rexBuilder, Pair.left(pairs), Pair.right(pairs), + Collections.nCopies(pairs.size(), false)); + replacer.mutate(constExps2); + } + + // Compute the values they reduce to. + RelOptPlanner.Executor executor = rel.getCluster().getPlanner().getExecutor(); + if (executor == null) { + // Cannot reduce expressions: caller has not set an executor in their + // environment. Caller should execute something like the following before + // invoking the planner: + // + // final RexExecutorImpl executor = + // new RexExecutorImpl(Schemas.createDataContext(null)); + // rootRel.getCluster().getPlanner().setExecutor(executor); + return false; + } + + final List reducedValues = Lists.newArrayList(); + executor.reduce(rexBuilder, constExps2, reducedValues); + + // For Project, we have to be sure to preserve the result + // types, so always cast regardless of the expression type. + // For other RelNodes like Filter, in general, this isn't necessary, + // and the presence of casts could hinder other rules such as sarg + // analysis, which require bare literals. But there are special cases, + // like when the expression is a UDR argument, that need to be + // handled as special cases. + if (rel instanceof HiveProject) { + addCasts = Collections.nCopies(reducedValues.size(), true); + } + + RexReplacer replacer = new RexReplacer(rexBuilder, constExps, reducedValues, addCasts); + replacer.mutate(expList); + return true; + } + + /** + * Locates expressions that can be reduced to literals or converted to + * expressions with redundant casts removed.R + * + * @param typeFactory + * Type factory + * @param exps + * list of candidate expressions to be examined for reduction + * @param constants + * List of expressions known to be constant + * @param constExps + * returns the list of expressions that can be constant reduced + * @param addCasts + * indicator for each expression that can be constant reduced, + * whether a cast of the resulting reduced expression is potentially + * necessary + * @param removableCasts + * returns the list of cast expressions where the cast + */ + private static void findReducibleExps(RelDataTypeFactory typeFactory, List exps, + ImmutableMap constants, List constExps, List addCasts, + List removableCasts) { + ReducibleExprLocator gardener = new ReducibleExprLocator(typeFactory, constants, constExps, + addCasts, removableCasts); + for (RexNode exp : exps) { + gardener.analyze(exp); + } + assert constExps.size() == addCasts.size(); + } + + private static ImmutableMap predicateConstants(RelOptPredicateList predicates) { + // We cannot use an ImmutableMap.Builder here. If there are multiple entries + // with the same key (e.g. "WHERE deptno = 1 AND deptno = 2"), it doesn't + // matter which we take, so the latter will replace the former. + // The basic idea is to find all the pairs of RexNode = RexLiteral + // (1) If 'predicates' contain a non-EQUALS, we bail out. + // (2) It is OK if a RexNode is equal to the same RexLiteral several times, + // (e.g. "WHERE deptno = 1 AND deptno = 1") + // (3) It will return false if there are inconsistent constraints (e.g. + // "WHERE deptno = 1 AND deptno = 2") + Map builder = Maps.newHashMap(); + boolean findUsefulConstants = true; + for (RexNode predicate : predicates.pulledUpPredicates) { + if (predicate.getKind().equals(SqlKind.EQUALS)) { + final List operands = ((RexCall) predicate).getOperands(); + if (operands.size() != 2) { + findUsefulConstants = false; + break; + } else { + if (operands.get(1) instanceof RexLiteral) { + RexLiteral literal = builder.get(operands.get(0)); + if (literal == null) { + builder.put(operands.get(0), (RexLiteral) operands.get(1)); + } else { + RexLiteral newLiteral = (RexLiteral) operands.get(1); + if (!literal.getValue().equals(newLiteral)) { + // should return false + // we bail out, the reduce filter expression rule should be able + // to deal with this. + findUsefulConstants = false; + break; + } + } + } else if (operands.get(0) instanceof RexLiteral) { + RexLiteral literal = builder.get(operands.get(1)); + if (literal == null) { + builder.put(operands.get(1), (RexLiteral) operands.get(0)); + } else { + RexLiteral newLiteral = (RexLiteral) operands.get(0); + if (!literal.getValue().equals(newLiteral)) { + // should return false + // we bail out, the reduce filter expression rule should be able + // to deal with this. + findUsefulConstants = false; + break; + } + } + } else { + findUsefulConstants = false; + break; + } + } + } else { + findUsefulConstants = false; + } + } + if (!findUsefulConstants) { + builder = Maps.newHashMap(); + } + return ImmutableMap.copyOf(builder); + } + + private static RexCall pushPredicateIntoCase(RexCall call) { + if (call.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { + return call; + } + int caseOrdinal = -1; + final List operands = call.getOperands(); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + switch (operand.getKind()) { + case CASE: + caseOrdinal = i; + } + } + if (caseOrdinal < 0) { + return call; + } + // Convert + // f(CASE WHEN p1 THEN v1 ... END, arg) + // to + // CASE WHEN p1 THEN f(v1, arg) ... END + final RexCall case_ = (RexCall) operands.get(caseOrdinal); + final List nodes = new ArrayList<>(); + for (int i = 0; i < case_.getOperands().size(); i++) { + RexNode node = case_.getOperands().get(i); + if (!RexUtil.isCasePredicate(case_, i)) { + node = substitute(call, caseOrdinal, node); + } + nodes.add(node); + } + return case_.clone(call.getType(), nodes); + } + + /** + * Converts op(arg0, ..., argOrdinal, ..., argN) to op(arg0,..., node, ..., + * argN). + */ + private static RexNode substitute(RexCall call, int ordinal, RexNode node) { + final List newOperands = Lists.newArrayList(call.getOperands()); + newOperands.set(ordinal, node); + return call.clone(call.getType(), newOperands); + } + + // ~ Inner Classes ---------------------------------------------------------- + + /** + * Replaces expressions with their reductions. Note that we only have to look + * for RexCall, since nothing else is reducible in the first place. + */ + private static class RexReplacer extends RexShuttle { + private final RexBuilder rexBuilder; + private final List reducibleExps; + private final List reducedValues; + private final List addCasts; + + RexReplacer(RexBuilder rexBuilder, List reducibleExps, List reducedValues, + List addCasts) { + this.rexBuilder = rexBuilder; + this.reducibleExps = reducibleExps; + this.reducedValues = reducedValues; + this.addCasts = addCasts; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + RexNode node = visit(inputRef); + if (node == null) { + return super.visitInputRef(inputRef); + } + return node; + } + + @Override + public RexNode visitCall(RexCall call) { + RexNode node = visit(call); + if (node != null) { + return node; + } + node = super.visitCall(call); + if (node != call) { + node = RexUtil.simplify(rexBuilder, node); + } + return node; + } + + private RexNode visit(final RexNode call) { + int i = reducibleExps.indexOf(call); + if (i == -1) { + return null; + } + RexNode replacement = reducedValues.get(i); + if (addCasts.get(i) && (replacement.getType() != call.getType())) { + // Handle change from nullable to NOT NULL by claiming + // that the result is still nullable, even though + // we know it isn't. + // + // Also, we cannot reduce CAST('abc' AS VARCHAR(4)) to 'abc'. + // If we make 'abc' of type VARCHAR(4), we may later encounter + // the same expression in a Project's digest where it has + // type VARCHAR(3), and that's wrong. + replacement = rexBuilder.makeAbstractCast(call.getType(), replacement); + } + return replacement; + } + } + + /** + * Helper class used to locate expressions that either can be reduced to + * literals or contain redundant casts. + */ + private static class ReducibleExprLocator extends RexVisitorImpl { + /** + * Whether an expression is constant, and if so, whether it can be reduced + * to a simpler constant. + */ + enum Constancy { + NON_CONSTANT, REDUCIBLE_CONSTANT, IRREDUCIBLE_CONSTANT + } + + private final RelDataTypeFactory typeFactory; + + private final List stack; + + private final ImmutableMap constants; + + private final List constExprs; + + private final List addCasts; + + private final List removableCasts; + + private final List parentCallTypeStack; + + ReducibleExprLocator(RelDataTypeFactory typeFactory, + ImmutableMap constants, List constExprs, + List addCasts, List removableCasts) { + // go deep + super(true); + this.typeFactory = typeFactory; + this.constants = constants; + this.constExprs = constExprs; + this.addCasts = addCasts; + this.removableCasts = removableCasts; + this.stack = Lists.newArrayList(); + this.parentCallTypeStack = Lists.newArrayList(); + } + + public void analyze(RexNode exp) { + assert stack.isEmpty(); + + exp.accept(this); + + // Deal with top of stack + assert stack.size() == 1; + assert parentCallTypeStack.isEmpty(); + Constancy rootConstancy = stack.get(0); + if (rootConstancy == Constancy.REDUCIBLE_CONSTANT) { + // The entire subtree was constant, so add it to the result. + addResult(exp); + } + stack.clear(); + } + + private Void pushVariable() { + stack.add(Constancy.NON_CONSTANT); + return null; + } + + private void addResult(RexNode exp) { + // Cast of literal can't be reduced, so skip those (otherwise we'd + // go into an infinite loop as we add them back). + if (exp.getKind() == SqlKind.CAST) { + RexCall cast = (RexCall) exp; + RexNode operand = cast.getOperands().get(0); + if (operand instanceof RexLiteral) { + return; + } + } + constExprs.add(exp); + + // In the case where the expression corresponds to a UDR argument, + // we need to preserve casts. Note that this only applies to + // the topmost argument, not expressions nested within the UDR + // call. + // + // REVIEW zfong 6/13/08 - Are there other expressions where we + // also need to preserve casts? + if (parentCallTypeStack.isEmpty()) { + addCasts.add(false); + } else { + addCasts.add(isUdf(Stacks.peek(parentCallTypeStack))); + } + } + + private Boolean isUdf(SqlOperator operator) { + // return operator instanceof UserDefinedRoutine + return false; + } + + public Void visitInputRef(RexInputRef inputRef) { + if (constants.containsKey(inputRef)) { + stack.add(Constancy.REDUCIBLE_CONSTANT); + return null; + } + return pushVariable(); + } + + public Void visitLiteral(RexLiteral literal) { + stack.add(Constancy.IRREDUCIBLE_CONSTANT); + return null; + } + + public Void visitOver(RexOver over) { + // assume non-constant (running SUM(1) looks constant but isn't) + analyzeCall(over, Constancy.NON_CONSTANT); + return null; + } + + public Void visitCorrelVariable(RexCorrelVariable correlVariable) { + return pushVariable(); + } + + public Void visitCall(RexCall call) { + // assume REDUCIBLE_CONSTANT until proven otherwise + analyzeCall(call, Constancy.REDUCIBLE_CONSTANT); + return null; + } + + private void analyzeCall(RexCall call, Constancy callConstancy) { + Stacks.push(parentCallTypeStack, call.getOperator()); + + // visit operands, pushing their states onto stack + super.visitCall(call); + + // look for NON_CONSTANT operands + int operandCount = call.getOperands().size(); + List operandStack = Util.last(stack, operandCount); + for (Constancy operandConstancy : operandStack) { + if (operandConstancy == Constancy.NON_CONSTANT) { + callConstancy = Constancy.NON_CONSTANT; + } + } + + // Even if all operands are constant, the call itself may + // be non-deterministic. + if (!call.getOperator().isDeterministic()) { + callConstancy = Constancy.NON_CONSTANT; + } else if (call.getOperator().isDynamicFunction()) { + // We can reduce the call to a constant, but we can't + // cache the plan if the function is dynamic. + // For now, treat it same as non-deterministic. + callConstancy = Constancy.NON_CONSTANT; + } + + // Row operator itself can't be reduced to a literal, but if + // the operands are constants, we still want to reduce those + if ((callConstancy == Constancy.REDUCIBLE_CONSTANT) + && (call.getOperator() instanceof SqlRowOperator)) { + callConstancy = Constancy.NON_CONSTANT; + } + + if (callConstancy == Constancy.NON_CONSTANT) { + // any REDUCIBLE_CONSTANT children are now known to be maximal + // reducible subtrees, so they can be added to the result + // list + for (int iOperand = 0; iOperand < operandCount; ++iOperand) { + Constancy constancy = operandStack.get(iOperand); + if (constancy == Constancy.REDUCIBLE_CONSTANT) { + addResult(call.getOperands().get(iOperand)); + } + } + + // if this cast expression can't be reduced to a literal, + // then see if we can remove the cast + if (call.getOperator() == SqlStdOperatorTable.CAST) { + reduceCasts(call); + } + } + + // pop operands off of the stack + operandStack.clear(); + + // pop this parent call operator off the stack + Stacks.pop(parentCallTypeStack, call.getOperator()); + + // push constancy result for this call onto stack + stack.add(callConstancy); + } + + private void reduceCasts(RexCall outerCast) { + List operands = outerCast.getOperands(); + if (operands.size() != 1) { + return; + } + RelDataType outerCastType = outerCast.getType(); + RelDataType operandType = operands.get(0).getType(); + if (operandType.equals(outerCastType)) { + removableCasts.add(outerCast); + return; + } + + // See if the reduction + // CAST((CAST x AS type) AS type NOT NULL) + // -> CAST(x AS type NOT NULL) + // applies. TODO jvs 15-Dec-2008: consider + // similar cases for precision changes. + if (!(operands.get(0) instanceof RexCall)) { + return; + } + RexCall innerCast = (RexCall) operands.get(0); + if (innerCast.getOperator() != SqlStdOperatorTable.CAST) { + return; + } + if (innerCast.getOperands().size() != 1) { + return; + } + RelDataType outerTypeNullable = typeFactory.createTypeWithNullability(outerCastType, true); + RelDataType innerTypeNullable = typeFactory.createTypeWithNullability(operandType, true); + if (outerTypeNullable != innerTypeNullable) { + return; + } + if (operandType.isNullable()) { + removableCasts.add(innerCast); + } + } + + public Void visitDynamicParam(RexDynamicParam dynamicParam) { + return pushVariable(); + } + + public Void visitRangeRef(RexRangeRef rangeRef) { + return pushVariable(); + } + + public Void visitFieldAccess(RexFieldAccess fieldAccess) { + return pushVariable(); + } + } + +} + +// End HiveReduceExpressionsRule.java + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 1f5d919..1428ab1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -150,8 +150,9 @@ static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) { switch (sqlType) { case BINARY: ByteString bs = (ByteString) literal.getValue(); - val = bs.byteAt(0); - type = HiveParser.BigintLiteral; + val = bs.toString(); + val = "'" + val + "'"; + type = HiveParser.StringLiteral; break; case TINYINT: if (useTypeQualInLiteral) { @@ -217,7 +218,7 @@ static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) { case TIMESTAMP: { val = literal.getValue(); type = HiveParser.TOK_TIMESTAMPLITERAL; - DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); val = df.format(((Calendar) val).getTime()); val = "'" + val + "'"; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 14946b3..6fa7340 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -544,7 +544,7 @@ public ASTNode visitCall(RexCall call) { SqlOperator op = call.getOperator(); List astNodeLst = new LinkedList(); if (op.kind == SqlKind.CAST) { - HiveToken ht = TypeConverter.hiveToken(call.getType()); + HiveToken ht = TypeConverter.hiveToken(call); ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); if (ht.args != null) { for (String castArg : ht.args) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 3d05161..1136dda 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; +import org.apache.calcite.avatica.util.ByteString; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelNode; @@ -109,6 +110,10 @@ private InputCtx(RelDataType calciteInpDataType, ImmutableMap h private final ImmutableList inputCtxs; private final boolean flattenExpr; + public RexNodeConverter(RelOptCluster cluster) { + this(cluster, new ArrayList(), false); + } + public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { this.cluster = cluster; @@ -314,6 +319,10 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx coi); RexNode calciteLiteral = null; + if (value == null) { + return cluster.getRexBuilder().makeLiteral(null, + cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true); + } // TODO: Verify if we need to use ConstantObjectInspector to unwrap data switch (hiveTypeCategory) { case BOOLEAN: @@ -423,6 +432,8 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true); break; case BINARY: + calciteLiteral = cluster.getRexBuilder().makeBinaryLiteral(new ByteString((byte[]) value)); + break; case UNKNOWN: default: throw new RuntimeException("UnSupported Literal"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index 2825f77..eeeedf8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.type.SqlTypeName; @@ -321,9 +322,9 @@ public static TypeInfo convertPrimitiveType(RelDataType rType) { } /*********************** Convert Calcite Types To Hive Types ***********************/ - public static HiveToken hiveToken(RelDataType calciteType) { + public static HiveToken hiveToken(RexCall call) { HiveToken ht = null; - + RelDataType calciteType = call.getType(); switch (calciteType.getSqlTypeName()) { case CHAR: { ht = new HiveToken(HiveParser.TOK_CHAR, "TOK_CHAR", String.valueOf(calciteType.getPrecision())); @@ -343,6 +344,14 @@ public static HiveToken hiveToken(RelDataType calciteType) { .getPrecision()), String.valueOf(calciteType.getScale())); } break; + case BINARY: { + if (call.getOperands().get(0).getType().getSqlTypeName().getName().equals("BINARY")) { + ht = new HiveToken(HiveParser.Identifier, "unhex"); + } else { + ht = new HiveToken(HiveParser.Identifier, "binary"); + } + } + break; default: ht = calciteToHiveTypeNameMap.get(calciteType.getSqlTypeName().getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e13356c..d58c6b5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -38,6 +38,7 @@ import org.antlr.runtime.tree.TreeVisitorAction; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptPlanner.Executor; import org.apache.calcite.plan.RelOptQuery; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; @@ -69,7 +70,6 @@ import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; import org.apache.calcite.rel.rules.ProjectMergeRule; import org.apache.calcite.rel.rules.ProjectRemoveRule; -import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; @@ -119,6 +119,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveHepPlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -150,6 +151,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; @@ -839,10 +841,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + // Create executor + Executor executorProvider = new HiveRexExecutorImpl(cluster); // 2. Apply Pre Join Order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - mdProvider.getMetadataProvider()); + mdProvider.getMetadataProvider(), executorProvider); // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) List list = Lists.newArrayList(); @@ -856,9 +860,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); @@ -936,9 +937,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu * original plan * @param mdProvider * meta data provider + * @param executorProvider + * executor * @return */ - private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, + RelMetadataProvider mdProvider, Executor executorProvider) { // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute @@ -964,10 +968,16 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv } // 3. Constant propagation, common filter extraction, and PPD - basePlan = hepPlan(basePlan, true, mdProvider, - ReduceExpressionsRule.PROJECT_INSTANCE, - ReduceExpressionsRule.FILTER_INSTANCE, - ReduceExpressionsRule.JOIN_INSTANCE, + // Note: we need to run Projection Pruning for HiveReduceExpressionsRule first. + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, + HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSortLimit.HIVE_SORT_REL_FACTORY, + HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); + basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, + HiveReduceExpressionsRule.PROJECT_INSTANCE, + HiveReduceExpressionsRule.FILTER_INSTANCE, + HiveReduceExpressionsRule.JOIN_INSTANCE, HivePreFilteringRule.INSTANCE, new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), @@ -983,10 +993,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new HivePartitionPruneRule(conf)); // 5. Projection Pruning - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, - HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSortLimit.HIVE_SORT_REL_FACTORY, - HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); // 6. Rerun PPD through Project as column pruning would have introduced DT @@ -1009,7 +1015,23 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, RelMetadataProvider mdProvider, RelOptRule... rules) { - return hepPlan(basePlan, followPlanChanges, mdProvider, + return hepPlan(basePlan, followPlanChanges, mdProvider, null, + HepMatchOrder.TOP_DOWN, rules); + } + + /** + * Run the HEP Planner with the given rule set. + * + * @param basePlan + * @param followPlanChanges + * @param mdProvider + * @param executorProvider + * @param rules + * @return optimized RelNode + */ + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, Executor executorProvider, RelOptRule... rules) { + return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider, HepMatchOrder.TOP_DOWN, rules); } @@ -1023,8 +1045,8 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, * @param rules * @return optimized RelNode */ - private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, RelMetadataProvider mdProvider, - HepMatchOrder order, RelOptRule... rules) { + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, HepMatchOrder order, RelOptRule... rules) { RelNode optimizedRelNode = basePlan; HepProgramBuilder programBuilder = new HepProgramBuilder(); @@ -1047,10 +1069,52 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, RelMetadata basePlan.getCluster().setMetadataProvider( new CachingRelMetadataProvider(chainedProvider, planner)); - // Executor is required for constant-reduction rules; see [CALCITE-566] - final RexExecutorImpl executor = - new RexExecutorImpl(Schemas.createDataContext(null)); - basePlan.getCluster().getPlanner().setExecutor(executor); + planner.setRoot(basePlan); + optimizedRelNode = planner.findBestExp(); + + return optimizedRelNode; + } + + /** + * Run the HEP Planner with the given rule set. + * + * @param basePlan + * @param followPlanChanges + * @param mdProvider + * @param executorProvider + * @param order + * @param rules + * @return optimized RelNode + */ + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, Executor executorProvider, HepMatchOrder order, + RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + HepProgramBuilder programBuilder = new HepProgramBuilder(); + if (followPlanChanges) { + programBuilder.addMatchOrder(order); + programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + } else { + // TODO: Should this be also TOP_DOWN? + for (RelOptRule r : rules) + programBuilder.addRuleInstance(r); + } + + HiveRulesRegistry registry = new HiveRulesRegistry(); + HiveHepPlannerContext context = new HiveHepPlannerContext(registry); + HepPlanner planner = new HepPlanner(programBuilder.build(), context); + + List list = Lists.newArrayList(); + list.add(mdProvider); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + if (executorProvider != null) { + basePlan.getCluster().getPlanner().setExecutor(executorProvider); + } planner.setRoot(basePlan); optimizedRelNode = planner.findBestExp(); diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out index 0bdecba..48a0c14 100644 --- a/ql/src/test/results/clientpositive/cast1.q.out +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -105,11 +105,11 @@ POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 3 + 2, 3.0 + 2, 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [] -POSTHOOK: Lineage: dest1.c2 EXPRESSION [] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [] -POSTHOOK: Lineage: dest1.c4 EXPRESSION [] -POSTHOOK: Lineage: dest1.c5 EXPRESSION [] +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] +POSTHOOK: Lineage: dest1.c3 SIMPLE [] +POSTHOOK: Lineage: dest1.c4 SIMPLE [] +POSTHOOK: Lineage: dest1.c5 SIMPLE [] POSTHOOK: Lineage: dest1.c6 EXPRESSION [] POSTHOOK: Lineage: dest1.c7 EXPRESSION [] PREHOOK: query: select dest1.* FROM dest1 diff --git a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out index 9fc1e7b..ad68987 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out @@ -402,7 +402,7 @@ PREHOOK: query: select 3 * 5 from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"(3 * 5)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"}]} +{"version":"1.0","engine":"mr","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"15","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"}]} 15 15 15 diff --git a/ql/src/test/results/clientpositive/create_genericudf.q.out b/ql/src/test/results/clientpositive/create_genericudf.q.out index 586f0ba..db3a9b5 100644 --- a/ql/src/test/results/clientpositive/create_genericudf.q.out +++ b/ql/src/test/results/clientpositive/create_genericudf.q.out @@ -50,13 +50,13 @@ SELECT POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [] -POSTHOOK: Lineage: dest1.c2 EXPRESSION [] +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 EXPRESSION [] POSTHOOK: Lineage: dest1.c4 EXPRESSION [] POSTHOOK: Lineage: dest1.c5 EXPRESSION [] -POSTHOOK: Lineage: dest1.c6 EXPRESSION [] -POSTHOOK: Lineage: dest1.c7 EXPRESSION [] +POSTHOOK: Lineage: dest1.c6 SIMPLE [] +POSTHOOK: Lineage: dest1.c7 SIMPLE [] PREHOOK: query: SELECT dest1.* FROM dest1 LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/cte_2.q.out b/ql/src/test/results/clientpositive/cte_2.q.out index a8bc760..d6923ba 100644 --- a/ql/src/test/results/clientpositive/cte_2.q.out +++ b/ql/src/test/results/clientpositive/cte_2.q.out @@ -40,7 +40,7 @@ select * POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@s1 -POSTHOOK: Lineage: s1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: s1.key SIMPLE [] POSTHOOK: Lineage: s1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from s1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_ppd.q.out b/ql/src/test/results/clientpositive/groupby_ppd.q.out index 6164a26..e3e4a50 100644 --- a/ql/src/test/results/clientpositive/groupby_ppd.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppd.q.out @@ -28,16 +28,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 1 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int) + outputColumnNames: _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: int), _col1 (type: int) + keys: 1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -54,16 +54,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Union Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 1 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int) + outputColumnNames: _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: int), _col1 (type: int) + keys: 1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -79,7 +79,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: int) + expressions: _col1 (type: int), 1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/input_part6.q.out b/ql/src/test/results/clientpositive/input_part6.q.out index fa51cdf..c01d8af 100644 --- a/ql/src/test/results/clientpositive/input_part6.q.out +++ b/ql/src/test/results/clientpositive/input_part6.q.out @@ -19,7 +19,7 @@ STAGE PLANS: predicate: (UDFToDouble(ds) = 1996.0) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '1996' (type: string), hr (type: string) + expressions: key (type: string), value (type: string), '1996.0' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/insert1.q.out b/ql/src/test/results/clientpositive/insert1.q.out index 49dd2d5..7a2c429 100644 --- a/ql/src/test/results/clientpositive/insert1.q.out +++ b/ql/src/test/results/clientpositive/insert1.q.out @@ -26,7 +26,7 @@ POSTHOOK: query: insert overwrite table insert1 select a.key, a.value from inser POSTHOOK: type: QUERY POSTHOOK: Input: default@insert2 POSTHOOK: Output: default@insert1 -POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: insert1.key SIMPLE [] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain insert into table insert1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/lineage2.q.out b/ql/src/test/results/clientpositive/lineage2.q.out index 4184a83..97f9f0e 100644 --- a/ql/src/test/results/clientpositive/lineage2.q.out +++ b/ql/src/test/results/clientpositive/lineage2.q.out @@ -402,7 +402,7 @@ PREHOOK: query: select 3 * 5 from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"(3 * 5)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"}]} +{"version":"1.0","engine":"mr","hash":"753abad4d55afd3df34fdc73abfcd44d","queryText":"select 3 * 5 from dest1","edges":[{"sources":[],"targets":[0],"expression":"15","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"}]} 15 15 15 diff --git a/ql/src/test/results/clientpositive/lineage3.q.out b/ql/src/test/results/clientpositive/lineage3.q.out index ad965c8..266d50f 100644 --- a/ql/src/test/results/clientpositive/lineage3.q.out +++ b/ql/src/test/results/clientpositive/lineage3.q.out @@ -166,7 +166,7 @@ where key in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"mr","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 273 val_273 PREHOOK: query: select * from src1 a @@ -178,15 +178,15 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"mr","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 -Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select key, value from src1 where key not in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} +{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} PREHOOK: query: select * from src1 a where not exists (select cint from alltypesorc b @@ -196,7 +196,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"mr","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 369 401 val_401 406 val_406 diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index d46b0ae..273234f 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -690,12 +690,10 @@ STAGE PLANS: predicate: (x = 484) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 484 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: int) + keys: 484 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 0d4cd15..0ec2ea0 100644 --- a/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -768,22 +768,22 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 888 Basic stats: COMPLETE Column stats: NONE @@ -833,25 +833,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1483 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 888 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out index aa3b272..eb5feec 100644 --- a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out @@ -756,22 +756,22 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE @@ -821,25 +821,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: boolean) + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index d677327..eb8ffe3 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -68,7 +68,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [] POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: from pcr_t1 @@ -83,8 +83,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] -POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [] +POSTHOOK: Lineage: pcr_t2.key SIMPLE [] POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain extended select key, value, ds @@ -165,7 +165,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) @@ -271,8 +271,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/quotedid_basic.q.out b/ql/src/test/results/clientpositive/quotedid_basic.q.out index 50c83a8..f9dc0e0 100644 --- a/ql/src/test/results/clientpositive/quotedid_basic.q.out +++ b/ql/src/test/results/clientpositive/quotedid_basic.q.out @@ -101,11 +101,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: x+1 (type: string), y&y (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string), '1' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -156,11 +156,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: x+1 (type: string), y&y (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string), '1' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -260,11 +260,11 @@ STAGE PLANS: predicate: (!@#$%^&*()_q = '1') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: x+1 (type: string), y&y (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: x+1 (type: string), y&y (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string), '1' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git a/ql/src/test/results/clientpositive/quotedid_partition.q.out b/ql/src/test/results/clientpositive/quotedid_partition.q.out index bc52c82..30cfce3 100644 --- a/ql/src/test/results/clientpositive/quotedid_partition.q.out +++ b/ql/src/test/results/clientpositive/quotedid_partition.q.out @@ -46,11 +46,11 @@ STAGE PLANS: predicate: (x+1 = '10') (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '10' (type: string), y&y (type: string), 'a' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: y&y (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + keys: '10' (type: string), _col1 (type: string), 'a' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_18.q.out index 6106188..4b29056 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_18.q.out @@ -238,17 +238,28 @@ STAGE PLANS: predicate: (key = 238) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: 238 (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 Stage: Stage-0 Move Operator @@ -277,7 +288,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 POSTHOOK: Output: default@test_table2@ds=2 -POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [] POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select count(*) from test_table2 where ds = '2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/stats_empty_partition.q.out b/ql/src/test/results/clientpositive/stats_empty_partition.q.out index c13817e..202263e 100644 --- a/ql/src/test/results/clientpositive/stats_empty_partition.q.out +++ b/ql/src/test/results/clientpositive/stats_empty_partition.q.out @@ -20,7 +20,7 @@ POSTHOOK: query: insert overwrite table tmptable partition (part = '1') select * POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmptable@part=1 -POSTHOOK: Lineage: tmptable PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable PARTITION(part=1).key SIMPLE [] POSTHOOK: Lineage: tmptable PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: describe formatted tmptable partition (part = '1') PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/subquery_notin.q.out b/ql/src/test/results/clientpositive/subquery_notin.q.out index 5563794..ac9c174 100644 --- a/ql/src/test/results/clientpositive/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -151,7 +151,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -285,7 +285,7 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -528,7 +528,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -567,7 +567,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -843,7 +843,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -890,7 +890,7 @@ almond aquamarine sandy cyan gainsboro 18 almond aquamarine yellow dodger mint 7 almond azure aquamarine papaya violet 12 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- agg, corr explain select p_mfgr, p_name, p_size @@ -1202,7 +1202,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) @@ -1278,7 +1278,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1442,7 +1442,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1587,7 +1587,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out index 9689ae3..7d23be3 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr -- JAVA_VERSION_SPECIFIC_OUTPUT @@ -188,7 +188,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select b.p_mfgr, min(p_retailprice) @@ -445,7 +445,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr @@ -470,7 +470,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 1173.15 Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[35][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select b.p_mfgr, min(p_retailprice) @@ -733,7 +733,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[35][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 908ad39..ede3320 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -773,7 +773,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out index 470fa83..2afd7ff 100644 --- a/ql/src/test/results/clientpositive/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/subquery_views.q.out @@ -111,8 +111,8 @@ where `b`.`key` not in from `default`.`src` `a` where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_11' ), tableType:VIRTUAL_VIEW) -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -420,8 +420,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf1.q.out b/ql/src/test/results/clientpositive/udf1.q.out index dffbccf..b3b694b 100644 --- a/ql/src/test/results/clientpositive/udf1.q.out +++ b/ql/src/test/results/clientpositive/udf1.q.out @@ -137,26 +137,26 @@ POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 'a' LIKE '%a%', 'b POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [] -POSTHOOK: Lineage: dest1.c11 EXPRESSION [] -POSTHOOK: Lineage: dest1.c12 EXPRESSION [] -POSTHOOK: Lineage: dest1.c13 EXPRESSION [] -POSTHOOK: Lineage: dest1.c14 EXPRESSION [] -POSTHOOK: Lineage: dest1.c15 EXPRESSION [] -POSTHOOK: Lineage: dest1.c16 EXPRESSION [] -POSTHOOK: Lineage: dest1.c17 EXPRESSION [] -POSTHOOK: Lineage: dest1.c18 EXPRESSION [] -POSTHOOK: Lineage: dest1.c19 EXPRESSION [] -POSTHOOK: Lineage: dest1.c2 EXPRESSION [] -POSTHOOK: Lineage: dest1.c20 EXPRESSION [] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [] -POSTHOOK: Lineage: dest1.c4 EXPRESSION [] -POSTHOOK: Lineage: dest1.c5 EXPRESSION [] -POSTHOOK: Lineage: dest1.c6 EXPRESSION [] -POSTHOOK: Lineage: dest1.c7 EXPRESSION [] -POSTHOOK: Lineage: dest1.c8 EXPRESSION [] -POSTHOOK: Lineage: dest1.c9 EXPRESSION [] +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c10 SIMPLE [] +POSTHOOK: Lineage: dest1.c11 SIMPLE [] +POSTHOOK: Lineage: dest1.c12 SIMPLE [] +POSTHOOK: Lineage: dest1.c13 SIMPLE [] +POSTHOOK: Lineage: dest1.c14 SIMPLE [] +POSTHOOK: Lineage: dest1.c15 SIMPLE [] +POSTHOOK: Lineage: dest1.c16 SIMPLE [] +POSTHOOK: Lineage: dest1.c17 SIMPLE [] +POSTHOOK: Lineage: dest1.c18 SIMPLE [] +POSTHOOK: Lineage: dest1.c19 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] +POSTHOOK: Lineage: dest1.c20 SIMPLE [] +POSTHOOK: Lineage: dest1.c3 SIMPLE [] +POSTHOOK: Lineage: dest1.c4 SIMPLE [] +POSTHOOK: Lineage: dest1.c5 SIMPLE [] +POSTHOOK: Lineage: dest1.c6 SIMPLE [] +POSTHOOK: Lineage: dest1.c7 SIMPLE [] +POSTHOOK: Lineage: dest1.c8 SIMPLE [] +POSTHOOK: Lineage: dest1.c9 SIMPLE [] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/udf_10_trims.q.out b/ql/src/test/results/clientpositive/udf_10_trims.q.out index 2f79723..3a5303a 100644 --- a/ql/src/test/results/clientpositive/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -117,4 +117,4 @@ WHERE src.key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [] +POSTHOOK: Lineage: dest1.c1 SIMPLE [] diff --git a/ql/src/test/results/clientpositive/udf_concat_insert2.q.out b/ql/src/test/results/clientpositive/udf_concat_insert2.q.out index f1b70fe..d68bd76 100644 --- a/ql/src/test/results/clientpositive/udf_concat_insert2.q.out +++ b/ql/src/test/results/clientpositive/udf_concat_insert2.q.out @@ -16,7 +16,7 @@ INSERT OVERWRITE TABLE dest1 SELECT concat('1234', 'abc', 'extra argument'), src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [] +POSTHOOK: Lineage: dest1.key SIMPLE [] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/union_date_trim.q.out b/ql/src/test/results/clientpositive/union_date_trim.q.out index e2f5269..324e8b7 100644 --- a/ql/src/test/results/clientpositive/union_date_trim.q.out +++ b/ql/src/test/results/clientpositive/union_date_trim.q.out @@ -51,4 +51,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@testdate POSTHOOK: Output: default@testdate POSTHOOK: Lineage: testdate.dt EXPRESSION [(testdate)testdate.FieldSchema(name:dt, type:date, comment:null), ] -POSTHOOK: Lineage: testdate.id EXPRESSION [(testdate)testdate.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: testdate.id EXPRESSION [] diff --git a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out index 1562087..b3fc7d3 100644 --- a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out +++ b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out @@ -527,7 +527,7 @@ STAGE PLANS: predicate: ((if(true, f1, f2) = 1) and (f1 = 1)) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) + expressions: if(true, 1, f2) (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Union @@ -550,7 +550,7 @@ STAGE PLANS: predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) + expressions: 0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Union diff --git a/ql/src/test/results/clientpositive/vector_decimal_round.q.out b/ql/src/test/results/clientpositive/vector_decimal_round.q.out index 25e5cfa..ec6226e 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -106,7 +106,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, (- 1)) (type: decimal(11,0)) + key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) @@ -242,7 +242,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, (- 1)) (type: decimal(11,0)) + key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) @@ -379,7 +379,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: round(_col0, (- 1)) (type: decimal(11,0)) + key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0))