diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 55c71dd..74ca5d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -81,6 +81,13 @@ public void initialize(HiveConf hiveConf) { transformations.add(new Generator()); } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && + pctx.getContext().isCboSucceeded()) { + // If CBO succeeded, we still need to do some simplifications in the + // predicates e.g. fold cast on constant + transformations.add(new SimpleConstantReduction()); + } + // Try to transform OR predicates in Filter into simpler IN clauses first if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && !pctx.getContext().isCboSucceeded()) { @@ -129,13 +136,11 @@ public void initialize(HiveConf hiveConf) { /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } - } - if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) - && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) || - (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) - && pctx.getContext().isCboSucceeded())) { - // PartitionPruner may create more folding opportunities, run ConstantPropagate again. - transformations.add(new ConstantPropagate()); + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && + !pctx.getContext().isCboSucceeded()) { + // PartitionPruner may create more folding opportunities, run ConstantPropagate again. + transformations.add(new ConstantPropagate()); + } } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleConstantReduction.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleConstantReduction.java new file mode 100644 index 0000000..82ae1ee --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleConstantReduction.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + */ +public class SimpleConstantReduction extends Transform { + + private static final Logger LOG = LoggerFactory.getLogger(SimpleConstantReduction.class); + + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + SimpleConstantReductionCtx scrCtx = new SimpleConstantReductionCtx(); + + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new FilterTransformer()); + opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + "%"), new SelectTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, scrCtx); + GraphWalker ogw = new ForwardWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + for (Operator opToDelete : scrCtx.getOpToDelete()) { + if (opToDelete.getParentOperators() == null || opToDelete.getParentOperators().size() != 1) { + throw new RuntimeException("Error pruning operator " + opToDelete + + ". It should have only 1 parent."); + } + opToDelete.getParentOperators().get(0).removeChildAndAdoptItsChildren(opToDelete); + } + scrCtx.getOpToDelete().clear(); + + return pctx; + } + + + private abstract class Transformer implements NodeProcessor { + + protected ExprNodeDesc reduceExpr(ExprNodeDesc predicate) throws SemanticException { + if (predicate instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) predicate; + + // If it is a constant tree, we transform + if (ExprNodeDescUtils.isConstant(fd)) { + ExprNodeDesc newPred = ConstantPropagateProcFactory.foldExpr(fd); + if (newPred != null) { + return newPred; + } + return fd; + } + + for(int i = 0; i < fd.getChildren().size(); i++) { + ExprNodeDesc newChild = reduceExpr(fd.getChildren().get(i)); + fd.getChildren().set(i, newChild); + } + } + return predicate; + } + } + + private class FilterTransformer extends Transformer { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + ExprNodeDesc newPredicate = reduceExpr(predicate); + filterOp.getConf().setPredicate(newPredicate); + if (newPredicate instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc c = (ExprNodeConstantDesc) newPredicate; + if (Boolean.TRUE.equals(c.getValue())) { + SimpleConstantReductionCtx scrCtx = (SimpleConstantReductionCtx) procCtx; + scrCtx.addOpToDelete(filterOp); + if (LOG.isDebugEnabled()) { + LOG.debug("Filter expression " + newPredicate + " holds true. Will delete it."); + } + } else if (Boolean.FALSE.equals(c.getValue())) { + if (LOG.isWarnEnabled()) { + LOG.warn("Filter expression " + newPredicate + " holds false!"); + } + } + } + return null; + } + + } + + private class SelectTransformer extends Transformer { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + SelectOperator selectOp = (SelectOperator) nd; + List columns = selectOp.getConf().getColList(); + for (int i = 0; i < columns.size(); i++) { + ExprNodeDesc column = columns.get(i); + ExprNodeDesc newColumn = reduceExpr(column); + columns.set(i, newColumn); + } + if (selectOp.getColumnExprMap() != null) { + for (Entry entry : selectOp.getColumnExprMap().entrySet()) { + entry.setValue(reduceExpr(entry.getValue())); + } + } + return null; + } + + } + + protected class SimpleConstantReductionCtx implements NodeProcessorCtx { + private final Set> opToDelete; + + public SimpleConstantReductionCtx() { + this.opToDelete = new HashSet>(); + } + + public void addOpToDelete(Operator op) { + opToDelete.add(op); + } + + public Set> getOpToDelete() { + return opToDelete; + } + + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index adfbb67..a20db01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; @@ -287,7 +288,7 @@ private boolean allStaticPartitions(Operator op, } if (op.getColumnExprMap() != null) { for(String dpCol : dpCols) { - ExprNodeDesc end = findConstantExprOrigin(dpCol, op); + ExprNodeDesc end = ExprNodeDescUtils.findConstantExprOrigin(dpCol, op); if (!(end instanceof ExprNodeConstantDesc)) { return false; } @@ -298,37 +299,6 @@ private boolean allStaticPartitions(Operator op, return true; } - // Find the constant origin of a certain column if it is originated from a constant - // Otherwise, it returns the expression that originated the column - private ExprNodeDesc findConstantExprOrigin(String dpCol, Operator op) { - ExprNodeDesc expr = op.getColumnExprMap().get(dpCol); - ExprNodeDesc foldedExpr; - // If it is a function, we try to fold it - if (expr instanceof ExprNodeGenericFuncDesc) { - foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)expr); - if (foldedExpr == null) { - foldedExpr = expr; - } - } else { - foldedExpr = expr; - } - // If it is a column reference, we will try to resolve it - if (foldedExpr instanceof ExprNodeColumnDesc) { - Operator originOp = null; - for(Operator parentOp : op.getParentOperators()) { - if (parentOp.getColumnExprMap() != null) { - originOp = parentOp; - break; - } - } - if (originOp != null) { - return findConstantExprOrigin(((ExprNodeColumnDesc)foldedExpr).getColumn(), originOp); - } - } - // Otherwise, we return the expression - return foldedExpr; - } - // Remove RS and SEL introduced by enforce bucketing/sorting config // Convert PARENT -> RS -> SEL -> FS to PARENT -> FS private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 0cfd529..7febfd5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -30,8 +29,6 @@ import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -67,6 +64,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; @@ -81,6 +79,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; @@ -213,13 +213,17 @@ private Long getNullcountFor(StatType type, ColumnStatisticsData statData) { private boolean hasNullOrConstantGbyKey(GroupByOperator gbyOp) { GroupByDesc gbyDesc = gbyOp.getConf(); + int numCols = gbyDesc.getOutputColumnNames().size(); + int aggCols = gbyDesc.getAggregators().size(); // If the Group by operator has null key - if (gbyDesc.getOutputColumnNames().size() == - gbyDesc.getAggregators().size()) { + if (numCols == aggCols) { return true; } - for (ExprNodeDesc en :gbyDesc.getKeys()) { - if (!(en instanceof ExprNodeConstantDesc)) { + // If the Gby key is a constant + List dpCols = gbyOp.getSchema().getColumnNames().subList(0, numCols - aggCols); + for(String dpCol : dpCols) { + ExprNodeDesc end = ExprNodeDescUtils.findConstantExprOrigin(dpCol, gbyOp); + if (!(end instanceof ExprNodeConstantDesc)) { return false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java new file mode 100644 index 0000000..370c0ec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.rules.AggregateProjectPullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; + +public class HiveAggregatePullUpConstantsRule extends AggregateProjectPullUpConstantsRule { + + public static final HiveAggregatePullUpConstantsRule INSTANCE = + new HiveAggregatePullUpConstantsRule(); + + public HiveAggregatePullUpConstantsRule() { + super(HiveAggregate.class, RelNode.class, + HiveRelFactories.HIVE_BUILDER, "HiveAggregatePullUpConstantsRule"); + } + + @Override + public boolean matches(RelOptRuleCall call) { + final Aggregate aggregate = call.rel(0); + // Rule cannot be applied if there are GroupingSets + if (aggregate.indicator) { + return false; + } + return super.matches(call); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java index d43c2c6..be59809 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.calcite.plan.RelOptRuleCall; @@ -36,11 +38,13 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.tools.RelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule { public static final HiveFilterProjectTransposeRule INSTANCE_DETERMINISTIC_WINDOWING = @@ -155,9 +159,9 @@ private static RelNode getNewProject(RexNode filterCondToPushBelowProj, RexNode } RelNode newPushedFilterRel = relBuilder.push(oldProj.getInput()).filter(newPushedCondition).build(); - + List newProjects = rewriteProjects(oldProj.getProjects(), newPushedCondition, relBuilder); RelNode newProjRel = relBuilder.push(newPushedFilterRel) - .project(oldProj.getProjects(), oldProj.getRowType().getFieldNames()).build(); + .project(newProjects, oldProj.getRowType().getFieldNames()).build(); if (unPushedFilCondAboveProj != null) { // Remove cast of BOOLEAN NOT NULL to BOOLEAN or vice versa. Filter accepts @@ -172,6 +176,56 @@ private static RelNode getNewProject(RexNode filterCondToPushBelowProj, RexNode return newProjRel; } + // Rewrite projects to replace column references by constants when possible + private static List rewriteProjects(List projects, RexNode newPushedCondition, + RelBuilder relBuilder) { + final List conjunctions = RelOptUtil.conjunctions(newPushedCondition); + final Map conditions = new HashMap(); + for (RexNode conjunction: conjunctions) { + // 1.1. If it is not a RexCall, we continue + if (!(conjunction instanceof RexCall)) { + continue; + } + // 1.2. We extract the information that we need + RexCall conjCall = (RexCall) conjunction; + if(conjCall.getOperator().getKind() == SqlKind.EQUALS) { + if (!(RexUtil.isConstant(conjCall.operands.get(0))) && + RexUtil.isConstant(conjCall.operands.get(1))) { + if (conditions.put(conjCall.operands.get(0).toString(), + conjCall.operands.get(1)) != null) { + // Several equality conditions on same column, + // just bail out and let other rules handle this + return projects; + } + } else if (!(RexUtil.isConstant(conjCall.operands.get(1))) && + RexUtil.isConstant(conjCall.operands.get(0))) { + if (conditions.put(conjCall.operands.get(1).toString(), + conjCall.operands.get(0)) != null) { + // Several equality conditions on same column, + // just bail out and let other rules handle this + return projects; + } + } + } + } + + List newProjects = new ArrayList(projects.size()); + for (RexNode project : projects) { + RexNode newProject = conditions.get(project.toString()); + if (newProject != null) { + if (newProject.getType().equals(project.getType())) { + newProjects.add(newProject); + } else { + newProjects.add(relBuilder.getRexBuilder().makeCast( + project.getType(), newProject, true)); + } + } else { + newProjects.add(project); + } + } + return newProjects; + } + private static Set getCommonPartitionCols(List projections) { RexOver overClause; boolean firstOverClause = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index 2825f77..1ab33c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -29,14 +30,19 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ConversionUtil; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.RowResolver; @@ -57,6 +63,9 @@ import com.google.common.collect.Lists; public class TypeConverter { + + protected static final Log LOG = LogFactory.getLog(TypeConverter.class); + private static final Map calciteToHiveTypeNameMap; // TODO: Handling of char[], varchar[], string... @@ -162,7 +171,9 @@ public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtF convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); break; case STRING: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case DATE: convertedType = dtFactory.createSqlType(SqlTypeName.DATE); @@ -187,12 +198,14 @@ public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtF .createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); break; case VARCHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, - ((BaseCharTypeInfo) type).getLength()); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case CHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, - ((BaseCharTypeInfo) type).getLength()); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case UNKNOWN: convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index b59347d..6471e4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -139,6 +139,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; @@ -1162,6 +1163,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); + rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); + rules.add(HiveSortProjectTransposeRule.INSTANCE); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); @@ -1213,8 +1216,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 8. Merge Project-Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, - HiveRelFactories.HIVE_PROJECT_FACTORY)); + basePlan = hepPlan(basePlan, false, mdProvider, null, + HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index c6f8907..09371dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -485,6 +486,25 @@ public static void getExprNodeColumnDesc(ExprNodeDesc exprDesc, } } + public static boolean isConstant(ExprNodeDesc value) { + if (value instanceof ExprNodeConstantDesc) { + return true; + } + if (value instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) value; + if (!FunctionRegistry.isDeterministic(func.getGenericUDF())) { + return false; + } + for (ExprNodeDesc child : func.getChildren()) { + if (!isConstant(child)) { + return false; + } + } + return true; + } + return false; + } + public static boolean isAllConstants(List value) { for (ExprNodeDesc expr : value) { if (!(expr instanceof ExprNodeConstantDesc)) { @@ -641,4 +661,35 @@ public static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) { } return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null; } + + // Find the constant origin of a certain column if it is originated from a constant + // Otherwise, it returns the expression that originated the column + public static ExprNodeDesc findConstantExprOrigin(String dpCol, Operator op) { + ExprNodeDesc expr = op.getColumnExprMap().get(dpCol); + ExprNodeDesc foldedExpr; + // If it is a function, we try to fold it + if (expr instanceof ExprNodeGenericFuncDesc) { + foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)expr); + if (foldedExpr == null) { + foldedExpr = expr; + } + } else { + foldedExpr = expr; + } + // If it is a column reference, we will try to resolve it + if (foldedExpr instanceof ExprNodeColumnDesc) { + Operator originOp = null; + for(Operator parentOp : op.getParentOperators()) { + if (parentOp.getColumnExprMap() != null) { + originOp = parentOp; + break; + } + } + if (originOp != null) { + return findConstantExprOrigin(((ExprNodeColumnDesc)foldedExpr).getColumn(), originOp); + } + } + // Otherwise, we return the expression + return foldedExpr; + } } diff --git ql/src/test/queries/clientpositive/join_view.q ql/src/test/queries/clientpositive/join_view.q index 16b6816..69c96be 100644 --- ql/src/test/queries/clientpositive/join_view.q +++ ql/src/test/queries/clientpositive/join_view.q @@ -3,8 +3,6 @@ drop table invites2; create table invites (foo int, bar string) partitioned by (ds string); create table invites2 (foo int, bar string) partitioned by (ds string); -set hive.mapred.mode=strict; - -- test join views: see HIVE-1989 create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds; @@ -13,4 +11,4 @@ explain select * from v where ds='2011-09-01'; drop view v; drop table invites; -drop table invites2; \ No newline at end of file +drop table invites2;