diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 024097e..0200506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -339,25 +339,25 @@ public JoinPredicateInfo(List nonEquiJoinPredicateElement return this.mapOfProjIndxInJoinSchemaToLeafPInfo; } - public static JoinPredicateInfo constructJoinPredicateInfo(Join j) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j) throws CalciteSemanticException { return constructJoinPredicateInfo(j, j.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj) { + public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj) throws CalciteSemanticException { return constructJoinPredicateInfo(mj, mj.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) throws CalciteSemanticException { return constructJoinPredicateInfo(j.getInputs(), j.getSystemFieldList(), predicate); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj, RexNode predicate) throws CalciteSemanticException { final List systemFieldList = ImmutableList.of(); return constructJoinPredicateInfo(mj.getInputs(), systemFieldList, predicate); } public static JoinPredicateInfo constructJoinPredicateInfo(List inputs, - List systemFieldList, RexNode predicate) { + List systemFieldList, RexNode predicate) throws CalciteSemanticException { JoinPredicateInfo jpi = null; JoinLeafPredicateInfo jlpi = null; List equiLPIList = new ArrayList(); @@ -504,7 +504,7 @@ public JoinLeafPredicateInfo( // split accordingly. If the join condition is not part of the equi-join predicate, // the returned object will be typed as SQLKind.OTHER. private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(List inputs, - List systemFieldList, RexNode pe) { + List systemFieldList, RexNode pe) throws CalciteSemanticException { JoinLeafPredicateInfo jlpi = null; List filterNulls = new ArrayList(); List> joinExprs = new ArrayList>(); @@ -513,7 +513,7 @@ private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(List getInputRef(List inputRefs, RelNode inputRel) { ImmutableList.Builder bldr = ImmutableList. builder(); for (int i : inputRefs) { - bldr.add(new RexInputRef(i, (RelDataType) inputRel.getRowType().getFieldList().get(i).getType())); + bldr.add(new RexInputRef(i, inputRel.getRowType().getFieldList().get(i).getType())); } return bldr.build(); } @@ -697,7 +697,7 @@ public Void visitCall(org.apache.calcite.rex.RexCall call) { public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, ExprNodeConverter exprConv) { ExprNodeDesc exprNode = null; - RexNode rexInputRef = new RexInputRef(inputRefIndx, (RelDataType) inputRel.getRowType() + RexNode rexInputRef = new RexInputRef(inputRefIndx, inputRel.getRowType() .getFieldList().get(inputRefIndx).getType()); exprNode = rexInputRef.accept(exprConv); @@ -723,9 +723,9 @@ public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, for (Integer iRef : inputRefs) { fieldNames.add(schemaNames.get(iRef)); } - + return fieldNames; - } + } /** * Walks over an expression and determines whether it is constant. @@ -789,12 +789,13 @@ public Boolean visitFieldAccess(RexFieldAccess fieldAccess) { private static class InputRefsCollector extends RexVisitorImpl { - private Set inputRefSet = new HashSet(); + private final Set inputRefSet = new HashSet(); private InputRefsCollector(boolean deep) { super(deep); } + @Override public Void visitInputRef(RexInputRef inputRef) { inputRefSet.add(inputRef.getIndex()); return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9ebb24f..ab793f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -7,7 +7,6 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; @@ -17,11 +16,13 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Util; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import com.google.common.collect.ImmutableList; public class HiveRelOptUtil extends RelOptUtil { @@ -48,14 +49,15 @@ * join predicate are at the end of the key lists * returned * @return What's left, never null + * @throws CalciteSemanticException */ - public static RexNode splitJoinCondition( + public static RexNode splitHiveJoinCondition( List sysFieldList, List inputs, RexNode condition, List> joinKeys, List filterNulls, - List rangeOp) { + List rangeOp) throws CalciteSemanticException { final List nonEquiList = new ArrayList<>(); splitJoinCondition( @@ -79,11 +81,10 @@ private static void splitJoinCondition( List> joinKeys, List filterNulls, List rangeOp, - List nonEquiList) { + List nonEquiList) throws CalciteSemanticException { final int sysFieldCount = sysFieldList.size(); final RelOptCluster cluster = inputs.get(0).getCluster(); final RexBuilder rexBuilder = cluster.getRexBuilder(); - final RelDataTypeFactory typeFactory = cluster.getTypeFactory(); final ImmutableBitSet[] inputsRange = new ImmutableBitSet[inputs.size()]; int totalFieldCount = 0; @@ -199,24 +200,25 @@ private static void splitJoinCondition( RelDataType rightKeyType = rightKey.getType(); if (leftKeyType != rightKeyType) { - // perform casting - RelDataType targetKeyType = - typeFactory.leastRestrictive( - ImmutableList.of(leftKeyType, rightKeyType)); + // perform casting using Hive rules + TypeInfo rType = TypeConverter.convert(rightKeyType); + TypeInfo lType = TypeConverter.convert(leftKeyType); + TypeInfo tgtType = FunctionRegistry.getCommonClassForComparison(lType, rType); - if (targetKeyType == null) { - throw Util.newInternal( + if (tgtType == null) { + throw new CalciteSemanticException( "Cannot find common type for join keys " - + leftKey + " (type " + leftKeyType + ") and " - + rightKey + " (type " + rightKeyType + ")"); + + leftKey + " (type " + leftKeyType + ") and " + + rightKey + " (type " + rightKeyType + ")"); } + RelDataType targetKeyType = TypeConverter.convert(tgtType, rexBuilder.getTypeFactory()); - if (leftKeyType != targetKeyType) { + if (leftKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, lType)) { leftKey = rexBuilder.makeCast(targetKeyType, leftKey); } - if (rightKeyType != targetKeyType) { + if (rightKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, rType)) { rightKey = rexBuilder.makeCast(targetKeyType, rightKey); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java index c5e0e11..fb7c1cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java @@ -22,7 +22,6 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; @@ -35,6 +34,7 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; @@ -142,8 +142,13 @@ private static RelNode mergeJoin(Join join, RelNode left, RelNode right) { leftJoinTypes = hmj.getJoinTypes(); } - boolean combinable = isCombinablePredicate(join, join.getCondition(), - leftCondition); + boolean combinable; + try { + combinable = isCombinablePredicate(join, join.getCondition(), + leftCondition); + } catch (CalciteSemanticException e) { + combinable = false; + } if (combinable) { newJoinFilters.add(leftCondition); for (int i = 0; i < leftJoinInputs.size(); i++) { @@ -172,8 +177,13 @@ private static RelNode mergeJoin(Join join, RelNode left, RelNode right) { for (int i=0; i()); } - RexNode otherCondition = HiveRelOptUtil.splitJoinCondition(systemFieldList, newInputs, join.getCondition(), - joinKeyExprs, filterNulls, null); + RexNode otherCondition; + try { + otherCondition = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), + joinKeyExprs, filterNulls, null); + } catch (CalciteSemanticException e) { + return null; + } // If there are remaining parts in the condition, we bail out if (!otherCondition.isAlwaysTrue()) { return null; @@ -221,7 +231,7 @@ private static RelNode mergeJoin(Join join, RelNode left, RelNode right) { } private static boolean isCombinablePredicate(Join join, - RexNode condition, RexNode otherCondition) { + RexNode condition, RexNode otherCondition) throws CalciteSemanticException { final JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. constructJoinPredicateInfo(join, condition); final JoinPredicateInfo otherJoinPredInfo = HiveCalciteUtil.JoinPredicateInfo. @@ -236,41 +246,4 @@ private static boolean isCombinablePredicate(Join join, } return true; } - - /** - * Shifts a filter originating from the right child of the LogicalJoin to the - * right, to reflect the filter now being applied on the resulting - * MultiJoin. - * - * @param joinRel the original LogicalJoin - * @param left the left child of the LogicalJoin - * @param right the right child of the LogicalJoin - * @param rightFilter the filter originating from the right child - * @return the adjusted right filter - */ - private static RexNode shiftRightFilter( - Join joinRel, - RelNode left, - RelNode right, - RexNode rightFilter) { - if (rightFilter == null) { - return null; - } - - int nFieldsOnLeft = left.getRowType().getFieldList().size(); - int nFieldsOnRight = right.getRowType().getFieldList().size(); - int[] adjustments = new int[nFieldsOnRight]; - for (int i = 0; i < nFieldsOnRight; i++) { - adjustments[i] = nFieldsOnLeft; - } - rightFilter = - rightFilter.accept( - new RelOptUtil.RexInputConverter( - joinRel.getCluster().getRexBuilder(), - right.getRowType().getFieldList(), - joinRel.getRowType().getFieldList(), - adjustments)); - return rightFilter; - } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 960ec40..715f24f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -32,6 +32,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; @@ -57,14 +58,14 @@ public Double getSelectivity(HiveTableScan t, RexNode predicate) { return 1.0; } - public Double getSelectivity(HiveJoin j, RexNode predicate) { + public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { if (j.getJoinType().equals(JoinRelType.INNER)) { return computeInnerJoinSelectivity(j, predicate); } return 1.0; } - private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) { + private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { double ndvCrossProduct = 1; Pair predInfo = getCombinedPredicateForJoin(j, predicate); @@ -183,7 +184,7 @@ protected double exponentialBackoff(List peLst, } /** - * + * * @param j * @param additionalPredicate * @return if predicate is the join condition return (true, joinCond) @@ -206,7 +207,7 @@ protected double exponentialBackoff(List peLst, /** * Compute Max NDV to determine Join Selectivity. - * + * * @param jlpi * @param colStatMap * Immutable Map of Projection Index (in Join Schema) to Column Stat @@ -238,5 +239,5 @@ private static Double getMaxNDVFromProjections(Map colStatMap, return maxNDVSoFar; } - + }