diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 6d1e85b..298855a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -34,11 +34,14 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -78,7 +81,7 @@ public static boolean validateASTForUnsupportedTokens(ASTNode ast) { String astTree = ast.toStringTree(); // if any of following tokens are present in AST, bail out - String[] tokens = { "TOK_CHARSETLITERAL","TOK_TABLESPLITSAMPLE" }; + String[] tokens = { "TOK_CHARSETLITERAL", "TOK_TABLESPLITSAMPLE" }; for (String token : tokens) { if (astTree.contains(token)) { return false; @@ -505,8 +508,8 @@ public static boolean orderRelNode(RelNode rel) { /** * Get top level select starting from root. Assumption here is root can only - * be Sort & Project. Also the top project should be at most 2 levels - * below Sort; i.e Sort(Limit)-Sort(OB)-Select + * be Sort & Project. Also the top project should be at most 2 levels below + * Sort; i.e Sort(Limit)-Sort(OB)-Select * * @param rootRel * @return @@ -527,4 +530,25 @@ public static boolean orderRelNode(RelNode rel) { return (new Pair(parentOforiginalProjRel, originalProjRel)); } + + public static boolean isDeterministic(RexNode expr) { + boolean deterministic = true; + + RexVisitor visitor = new RexVisitorImpl(true) { + public Void visitCall(org.apache.calcite.rex.RexCall call) { + if (!call.getOperator().isDeterministic()) { + throw new Util.FoundOne(call); + } + return super.visitCall(call); + } + }; + + try { + expr.accept(visitor); + } catch (Util.FoundOne e) { + deterministic = false; + } + + return deterministic; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java index dcaf831..86ddaa6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java @@ -24,6 +24,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputFinder; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; @@ -34,6 +35,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; @@ -57,13 +59,21 @@ protected HiveFilterJoinRule(RelOptRuleOperand operand, String id, boolean smart */ public static class HiveFilterJoinMergeRule extends HiveFilterJoinRule { public HiveFilterJoinMergeRule() { - super(RelOptRule.operand(Filter.class, - RelOptRule.operand(Join.class, RelOptRule.any())), + super(RelOptRule.operand(Filter.class, RelOptRule.operand(Join.class, RelOptRule.any())), "HiveFilterJoinRule:filter", true, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY); } @Override + public boolean matches(RelOptRuleCall call) { + Filter filter = call.rel(0); + if (!HiveCalciteUtil.isDeterministic(filter.getCondition())) { + return false; + } + return true; + } + + @Override public void onMatch(RelOptRuleCall call) { Filter filter = call.rel(0); Join join = call.rel(1); @@ -73,9 +83,22 @@ public void onMatch(RelOptRuleCall call) { public static class HiveFilterJoinTransposeRule extends HiveFilterJoinRule { public HiveFilterJoinTransposeRule() { - super(RelOptRule.operand(Join.class, RelOptRule.any()), - "HiveFilterJoinRule:no-filter", true, HiveFilter.DEFAULT_FILTER_FACTORY, - HiveProject.DEFAULT_PROJECT_FACTORY); + super(RelOptRule.operand(Join.class, RelOptRule.any()), "HiveFilterJoinRule:no-filter", true, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY); + } + + @Override + public boolean matches(RelOptRuleCall call) { + Join join = call.rel(0); + List joinConds = RelOptUtil.conjunctions(join.getCondition()); + + for (RexNode joinCnd : joinConds) { + if (!HiveCalciteUtil.isDeterministic(joinCnd)) { + return false; + } + } + + return true; } @Override @@ -140,12 +163,11 @@ private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) { boolean refersToBothSides = false; int joinNoOfProjects = j.getRowType().getFieldCount(); - ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply( - new BitSet(joinNoOfProjects)); - ImmutableBitSet allLeftProjs = filterProjs.union( - ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount())); - ImmutableBitSet allRightProjs = filterProjs.union( - ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects)); + ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(new BitSet(joinNoOfProjects)); + ImmutableBitSet allLeftProjs = filterProjs.union(ImmutableBitSet.range(0, j.getInput(0) + .getRowType().getFieldCount())); + ImmutableBitSet allRightProjs = filterProjs.union(ImmutableBitSet.range(j.getInput(0) + .getRowType().getFieldCount(), joinNoOfProjects)); filterProjs = filterProjs.union(InputFinder.bits(filter)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java new file mode 100644 index 0000000..7e484b9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.core.RelFactories.ProjectFactory; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; + +public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule { + + public HiveFilterProjectTransposeRule(Class filterClass, + FilterFactory filterFactory, Class projectClass, + ProjectFactory projectFactory) { + super(filterClass, filterFactory, projectClass, projectFactory); + } + + @Override + public boolean matches(RelOptRuleCall call) { + final Filter filterRel = call.rel(0); + RexNode condition = filterRel.getCondition(); + if (!HiveCalciteUtil.isDeterministic(condition)) { + return false; + } + + return super.matches(call); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java new file mode 100644 index 0000000..5c16d8f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; + +public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { + + public HiveFilterSetOpTransposeRule(FilterFactory filterFactory) { + super(filterFactory); + } + + @Override + public boolean matches(RelOptRuleCall call) { + Filter filterRel = call.rel(0); + RexNode condition = filterRel.getCondition(); + if (!HiveCalciteUtil.isDeterministic(condition)) { + return false; + } + + return super.matches(call); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 6a4150c..7915b80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -78,7 +78,8 @@ } public static SqlOperator getCalciteOperator(String funcTextName, GenericUDF hiveUDF, - ImmutableList calciteArgTypes, RelDataType retType) throws CalciteSemanticException { + ImmutableList calciteArgTypes, RelDataType retType) + throws CalciteSemanticException { // handle overloaded methods first if (hiveUDF instanceof GenericUDFOPNegative) { return SqlStdOperatorTable.UNARY_MINUS; @@ -87,15 +88,17 @@ public static SqlOperator getCalciteOperator(String funcTextName, GenericUDF hiv } // do generic lookup String name = null; if (StringUtils.isEmpty(funcTextName)) { - name = getName(hiveUDF); // this should probably never happen, see getName - // comment + name = getName(hiveUDF); // this should probably never happen, see + // getName + // comment LOG.warn("The function text was empty, name from annotation is " + name); } else { - // We could just do toLowerCase here and let SA qualify it, but let's be + // We could just do toLowerCase here and let SA qualify it, but + // let's be // proper... name = FunctionRegistry.getNormalizedFunctionName(funcTextName); } - return getCalciteFn(name, calciteArgTypes, retType); + return getCalciteFn(name, calciteArgTypes, retType, FunctionRegistry.isDeterministic(hiveUDF)); } public static GenericUDF getHiveUDF(SqlOperator op, RelDataType dt, int argsLength) { @@ -129,7 +132,8 @@ public static GenericUDF getHiveUDF(SqlOperator op, RelDataType dt, int argsLeng return hFn == null ? null : hFn.getGenericUDF(); } - private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) throws SemanticException { + private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) + throws SemanticException { FunctionInfo castUDF = null; if (op.kind == SqlKind.CAST) { @@ -158,7 +162,7 @@ private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) t castUDF = FunctionRegistry.getFunctionInfo("double"); } else if (castType.equals(TypeInfoFactory.timestampTypeInfo)) { castUDF = FunctionRegistry.getFunctionInfo("timestamp"); - } else if (castType.equals(TypeInfoFactory.dateTypeInfo)) { + } else if (castType.equals(TypeInfoFactory.dateTypeInfo)) { castUDF = FunctionRegistry.getFunctionInfo("date"); } else if (castType instanceof DecimalTypeInfo) { castUDF = handleCastForParameterizedType(castType, @@ -182,7 +186,8 @@ private static FunctionInfo handleCastForParameterizedType(TypeInfo ti, Function return new FunctionInfo(fi.isNative(), fi.getDisplayName(), (GenericUDF) udf); } - // TODO: 1) handle Agg Func Name translation 2) is it correct to add func args + // TODO: 1) handle Agg Func Name translation 2) is it correct to add func + // args // as child of func? public static ASTNode buildAST(SqlOperator op, List children) { HiveToken hToken = calciteToHiveToken.get(op); @@ -231,11 +236,14 @@ public static ASTNode buildAST(SqlOperator op, List children, int i) { } - // TODO: this is not valid. Function names for built-in UDFs are specified in + // TODO: this is not valid. Function names for built-in UDFs are specified + // in // FunctionRegistry, - // and only happen to match annotations. For user UDFs, the name is what user + // and only happen to match annotations. For user UDFs, the name is what + // user // specifies at - // creation time (annotation can be absent, different, or duplicate some other + // creation time (annotation can be absent, different, or duplicate some + // other // function). private static String getName(GenericUDF hiveUDF) { String udfName = null; @@ -268,10 +276,12 @@ private static String getName(GenericUDF hiveUDF) { return udfName; } - /** This class is used to build immutable hashmaps in the static block above. */ + /** + * This class is used to build immutable hashmaps in the static block above. + */ private static class StaticBlockBuilder { - final Map hiveToCalcite = Maps.newHashMap(); - final Map calciteToHiveToken = Maps.newHashMap(); + final Map hiveToCalcite = Maps.newHashMap(); + final Map calciteToHiveToken = Maps.newHashMap(); final Map reverseOperatorMap = Maps.newHashMap(); StaticBlockBuilder() { @@ -317,6 +327,7 @@ private static HiveToken hToken(int type, String text) { return new HiveToken(type, text); } + // UDAF is assumed to be deterministic public static class CalciteUDAF extends SqlAggFunction { public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference, SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, @@ -326,6 +337,22 @@ public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference, } } + private static class CalciteSqlFn extends SqlFunction { + private final boolean deterministic; + + public CalciteSqlFn(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, + SqlFunctionCategory category, boolean deterministic) { + super(name, kind, returnTypeInference, operandTypeInference, operandTypeChecker, category); + this.deterministic = deterministic; + } + + @Override + public boolean isDeterministic() { + return deterministic; + } + } + private static class CalciteUDFInfo { private String udfName; private SqlReturnTypeInference returnTypeInference; @@ -354,12 +381,14 @@ private static CalciteUDFInfo getUDFInfo(String hiveUdfName, } public static SqlOperator getCalciteFn(String hiveUdfName, - ImmutableList calciteArgTypes, RelDataType calciteRetType) + ImmutableList calciteArgTypes, RelDataType calciteRetType, boolean deterministic) throws CalciteSemanticException { if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) { - // We can create Calcite IS_DISTINCT_FROM operator for this. But since our - // join reordering algo cant handle this anyway there is no advantage of + // We can create Calcite IS_DISTINCT_FROM operator for this. But + // since our + // join reordering algo cant handle this anyway there is no + // advantage of // this. // So, bail out for now. throw new CalciteSemanticException("<=> is not yet supported for cbo."); @@ -367,9 +396,9 @@ public static SqlOperator getCalciteFn(String hiveUdfName, SqlOperator calciteOp = hiveToCalcite.get(hiveUdfName); if (calciteOp == null) { CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); - calciteOp = new SqlFunction(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, + calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, uInf.operandTypeInference, uInf.operandTypeChecker, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic); } return calciteOp; @@ -381,8 +410,8 @@ public static SqlAggFunction getCalciteAggFn(String hiveUdfName, if (calciteAggFn == null) { CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); - calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, uInf.operandTypeInference, - uInf.operandTypeChecker, uInf.argTypes, uInf.retType); + calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker, uInf.argTypes, uInf.retType); } return calciteAggFn; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7aede8c..cef2be9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -194,6 +194,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; @@ -12673,9 +12675,9 @@ public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdPro SemiJoinProjectTransposeRule.INSTANCE); basePlan = hepPlan(basePlan, true, mdProvider, - new FilterProjectTransposeRule( + new HiveFilterProjectTransposeRule( Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY), new FilterSetOpTransposeRule( + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( HiveFilter.DEFAULT_FILTER_FACTORY), new FilterMergeRule( HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN,