diff --git pom.xml pom.xml index 688a12f..c07f692 100644 --- pom.xml +++ pom.xml @@ -99,7 +99,7 @@ 3.4 1.7.5 0.8.0.RELEASE - 0.9.2-incubating + 1.0.0-incubating-SNAPSHOT 3.2.6 3.2.10 3.2.9 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java new file mode 100644 index 0000000..a71cd35 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * Exception from SemanticAnalyzer. + */ + +public class CalciteSemanticException extends SemanticException { + + private static final long serialVersionUID = 1L; + + public CalciteSemanticException() { + super(); + } + + public CalciteSemanticException(String message) { + super(message); + } + + public CalciteSemanticException(Throwable cause) { + super(cause); + } + + public CalciteSemanticException(String message, Throwable cause) { + super(message, cause); + } + + public CalciteSemanticException(ErrorMsg errorMsg, String... msgArgs) { + super(errorMsg, msgArgs); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java new file mode 100644 index 0000000..6d1e85b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -0,0 +1,530 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories.ProjectFactory; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.validate.SqlValidatorUtil; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.parse.ASTNode; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +/** + * Generic utility functions needed for Calcite based Hive CBO. + */ + +public class HiveCalciteUtil { + + /** + * Get list of virtual columns from the given list of projections. + *

+ * + * @param exps + * list of rex nodes representing projections + * @return List of Virtual Columns, will not be null. + */ + public static List getVirtualCols(List exps) { + List vCols = new ArrayList(); + + for (int i = 0; i < exps.size(); i++) { + if (!(exps.get(i) instanceof RexInputRef)) { + vCols.add(i); + } + } + + return vCols; + } + + public static boolean validateASTForUnsupportedTokens(ASTNode ast) { + String astTree = ast.toStringTree(); + // if any of following tokens are present in AST, bail out + String[] tokens = { "TOK_CHARSETLITERAL","TOK_TABLESPLITSAMPLE" }; + for (String token : tokens) { + if (astTree.contains(token)) { + return false; + } + } + return true; + } + + public static List getProjsFromBelowAsInputRef(final RelNode rel) { + List projectList = Lists.transform(rel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField field) { + return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex()); + } + }); + return projectList; + } + + public static List translateBitSetToProjIndx(ImmutableBitSet projBitSet) { + List projIndxLst = new ArrayList(); + + for (int i = 0; i < projBitSet.length(); i++) { + if (projBitSet.get(i)) { + projIndxLst.add(i); + } + } + + return projIndxLst; + } + + /** + * Push any equi join conditions that are not column references as Projections + * on top of the children. + * + * @param factory + * Project factory to use. + * @param inputRels + * inputs to a join + * @param leftJoinKeys + * expressions for LHS of join key + * @param rightJoinKeys + * expressions for RHS of join key + * @param systemColCount + * number of system columns, usually zero. These columns are + * projected at the leading edge of the output row. + * @param leftKeys + * on return this contains the join key positions from the new + * project rel on the LHS. + * @param rightKeys + * on return this contains the join key positions from the new + * project rel on the RHS. + * @return the join condition after the equi expressions pushed down. + */ + public static RexNode projectNonColumnEquiConditions(ProjectFactory factory, RelNode[] inputRels, + List leftJoinKeys, List rightJoinKeys, int systemColCount, + List leftKeys, List rightKeys) { + RelNode leftRel = inputRels[0]; + RelNode rightRel = inputRels[1]; + RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder(); + RexNode outJoinCond = null; + + int origLeftInputSize = leftRel.getRowType().getFieldCount(); + int origRightInputSize = rightRel.getRowType().getFieldCount(); + + List newLeftFields = new ArrayList(); + List newLeftFieldNames = new ArrayList(); + + List newRightFields = new ArrayList(); + List newRightFieldNames = new ArrayList(); + int leftKeyCount = leftJoinKeys.size(); + int i; + + for (i = 0; i < origLeftInputSize; i++) { + final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i); + newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i)); + newLeftFieldNames.add(field.getName()); + } + + for (i = 0; i < origRightInputSize; i++) { + final RelDataTypeField field = rightRel.getRowType().getFieldList().get(i); + newRightFields.add(rexBuilder.makeInputRef(field.getType(), i)); + newRightFieldNames.add(field.getName()); + } + + int newKeyCount = 0; + List> origColEqConds = new ArrayList>(); + for (i = 0; i < leftKeyCount; i++) { + RexNode leftKey = leftJoinKeys.get(i); + RexNode rightKey = rightJoinKeys.get(i); + + if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) { + origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(), + ((RexInputRef) rightKey).getIndex())); + } else { + newLeftFields.add(leftKey); + newLeftFieldNames.add(null); + newRightFields.add(rightKey); + newRightFieldNames.add(null); + newKeyCount++; + } + } + + for (i = 0; i < origColEqConds.size(); i++) { + Pair p = origColEqConds.get(i); + RexNode leftKey = leftJoinKeys.get(i); + RexNode rightKey = rightJoinKeys.get(i); + leftKeys.add(p.left); + rightKeys.add(p.right); + RexNode cond = rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left), + rexBuilder.makeInputRef(rightKey.getType(), systemColCount + origLeftInputSize + + newKeyCount + p.right)); + if (outJoinCond == null) { + outJoinCond = cond; + } else { + outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond); + } + } + + if (newKeyCount == 0) { + return outJoinCond; + } + + int newLeftOffset = systemColCount + origLeftInputSize; + int newRightOffset = systemColCount + origLeftInputSize + origRightInputSize + newKeyCount; + for (i = 0; i < newKeyCount; i++) { + leftKeys.add(origLeftInputSize + i); + rightKeys.add(origRightInputSize + i); + RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newLeftOffset + i), + rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newRightOffset + i)); + if (outJoinCond == null) { + outJoinCond = cond; + } else { + outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond); + } + } + + // added project if need to produce new keys than the original input + // fields + if (newKeyCount > 0) { + leftRel = factory.createProject(leftRel, newLeftFields, + SqlValidatorUtil.uniquify(newLeftFieldNames)); + rightRel = factory.createProject(rightRel, newRightFields, + SqlValidatorUtil.uniquify(newRightFieldNames)); + } + + inputRels[0] = leftRel; + inputRels[1] = rightRel; + + return outJoinCond; + } + + /** + * JoinPredicateInfo represents Join condition; JoinPredicate Info uses + * JoinLeafPredicateInfo to represent individual conjunctive elements in the + * predicate.
+ * JoinPredicateInfo = JoinLeafPredicateInfo1 and JoinLeafPredicateInfo2...
+ *

+ * JoinPredicateInfo:
+ * 1. preserves the order of conjuctive elements for + * equi-join(equiJoinPredicateElements)
+ * 2. Stores set of projection indexes from left and right child which is part + * of equi join keys; the indexes are both in child and Join node schema.
+ * 3. Keeps a map of projection indexes that are part of join keys to list of + * conjuctive elements(JoinLeafPredicateInfo) that uses them. + * + */ + public static class JoinPredicateInfo { + private final ImmutableList nonEquiJoinPredicateElements; + private final ImmutableList equiJoinPredicateElements; + private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema; + private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema; + private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema; + private final ImmutableMap> mapOfProjIndxInJoinSchemaToLeafPInfo; + + public JoinPredicateInfo(List nonEquiJoinPredicateElements, + List equiJoinPredicateElements, + Set projsFromLeftPartOfJoinKeysInChildSchema, + Set projsFromRightPartOfJoinKeysInChildSchema, + Set projsFromRightPartOfJoinKeysInJoinSchema, + Map> mapOfProjIndxInJoinSchemaToLeafPInfo) { + this.nonEquiJoinPredicateElements = ImmutableList.copyOf(nonEquiJoinPredicateElements); + this.equiJoinPredicateElements = ImmutableList.copyOf(equiJoinPredicateElements); + this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet + .copyOf(projsFromLeftPartOfJoinKeysInChildSchema); + this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet + .copyOf(projsFromRightPartOfJoinKeysInChildSchema); + this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet + .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); + this.mapOfProjIndxInJoinSchemaToLeafPInfo = ImmutableMap + .copyOf(mapOfProjIndxInJoinSchemaToLeafPInfo); + } + + public List getNonEquiJoinPredicateElements() { + return this.nonEquiJoinPredicateElements; + } + + public List getEquiJoinPredicateElements() { + return this.equiJoinPredicateElements; + } + + public Set getProjsFromLeftPartOfJoinKeysInChildSchema() { + return this.projsFromLeftPartOfJoinKeysInChildSchema; + } + + public Set getProjsFromRightPartOfJoinKeysInChildSchema() { + return this.projsFromRightPartOfJoinKeysInChildSchema; + } + + /** + * NOTE: Join Schema = left Schema + (right Schema offset by + * left.fieldcount). Hence its ok to return projections from left in child + * schema. + */ + public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() { + return this.projsFromLeftPartOfJoinKeysInChildSchema; + } + + public Set getProjsFromRightPartOfJoinKeysInJoinSchema() { + return this.projsFromRightPartOfJoinKeysInJoinSchema; + } + + public Map> getMapOfProjIndxToLeafPInfo() { + return this.mapOfProjIndxInJoinSchemaToLeafPInfo; + } + + public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j) { + return constructJoinPredicateInfo(j, j.getCondition()); + } + + public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j, RexNode predicate) { + JoinPredicateInfo jpi = null; + JoinLeafPredicateInfo jlpi = null; + List equiLPIList = new ArrayList(); + List nonEquiLPIList = new ArrayList(); + Set projsFromLeftPartOfJoinKeys = new HashSet(); + Set projsFromRightPartOfJoinKeys = new HashSet(); + Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet(); + Map> tmpMapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>(); + Map> mapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>(); + List tmpJLPILst = null; + int rightOffSet = j.getLeft().getRowType().getFieldCount(); + int projIndxInJoin; + List conjuctiveElements; + + // 1. Decompose Join condition to a number of leaf predicates + // (conjuctive elements) + conjuctiveElements = RelOptUtil.conjunctions(predicate); + + // 2. Walk through leaf predicates building up JoinLeafPredicateInfo + for (RexNode ce : conjuctiveElements) { + // 2.1 Construct JoinLeafPredicateInfo + jlpi = JoinLeafPredicateInfo.constructJoinLeafPredicateInfo(j, ce); + + // 2.2 Classify leaf predicate as Equi vs Non Equi + if (jlpi.comparisonType.equals(SqlKind.EQUALS)) { + equiLPIList.add(jlpi); + } else { + nonEquiLPIList.add(jlpi); + } + + // 2.3 Maintain join keys coming from left vs right (in child & + // Join Schema) + projsFromLeftPartOfJoinKeys.addAll(jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()); + projsFromRightPartOfJoinKeys.addAll(jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()); + projsFromRightPartOfJoinKeysInJoinSchema.addAll(jlpi + .getProjsFromRightPartOfJoinKeysInJoinSchema()); + + // 2.4 Update Join Key to JoinLeafPredicateInfo map with keys + // from left + for (Integer projIndx : jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { + tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndx); + if (tmpJLPILst == null) + tmpJLPILst = new ArrayList(); + tmpJLPILst.add(jlpi); + tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndx, tmpJLPILst); + } + + // 2.5 Update Join Key to JoinLeafPredicateInfo map with keys + // from right + for (Integer projIndx : jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { + projIndxInJoin = projIndx + rightOffSet; + tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndxInJoin); + if (tmpJLPILst == null) + tmpJLPILst = new ArrayList(); + tmpJLPILst.add(jlpi); + tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndxInJoin, tmpJLPILst); + } + + } + + // 3. Update Update Join Key to List to use + // ImmutableList + for (Entry> e : tmpMapOfProjIndxInJoinSchemaToLeafPInfo + .entrySet()) { + mapOfProjIndxInJoinSchemaToLeafPInfo.put(e.getKey(), ImmutableList.copyOf(e.getValue())); + } + + // 4. Construct JoinPredicateInfo + jpi = new JoinPredicateInfo(nonEquiLPIList, equiLPIList, projsFromLeftPartOfJoinKeys, + projsFromRightPartOfJoinKeys, projsFromRightPartOfJoinKeysInJoinSchema, + mapOfProjIndxInJoinSchemaToLeafPInfo); + return jpi; + } + } + + /** + * JoinLeafPredicateInfo represents leaf predicate in Join condition + * (conjuctive lement).
+ *

+ * JoinLeafPredicateInfo:
+ * 1. Stores list of expressions from left and right child which is part of + * equi join keys.
+ * 2. Stores set of projection indexes from left and right child which is part + * of equi join keys; the indexes are both in child and Join node schema.
+ */ + public static class JoinLeafPredicateInfo { + private final SqlKind comparisonType; + private final ImmutableList joinKeyExprsFromLeft; + private final ImmutableList joinKeyExprsFromRight; + private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema; + private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema; + private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema; + + public JoinLeafPredicateInfo(SqlKind comparisonType, List joinKeyExprsFromLeft, + List joinKeyExprsFromRight, Set projsFromLeftPartOfJoinKeysInChildSchema, + Set projsFromRightPartOfJoinKeysInChildSchema, + Set projsFromRightPartOfJoinKeysInJoinSchema) { + this.comparisonType = comparisonType; + this.joinKeyExprsFromLeft = ImmutableList.copyOf(joinKeyExprsFromLeft); + this.joinKeyExprsFromRight = ImmutableList.copyOf(joinKeyExprsFromRight); + this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet + .copyOf(projsFromLeftPartOfJoinKeysInChildSchema); + this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet + .copyOf(projsFromRightPartOfJoinKeysInChildSchema); + this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet + .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); + } + + public List getJoinKeyExprsFromLeft() { + return this.joinKeyExprsFromLeft; + } + + public List getJoinKeyExprsFromRight() { + return this.joinKeyExprsFromRight; + } + + public Set getProjsFromLeftPartOfJoinKeysInChildSchema() { + return this.projsFromLeftPartOfJoinKeysInChildSchema; + } + + /** + * NOTE: Join Schema = left Schema + (right Schema offset by + * left.fieldcount). Hence its ok to return projections from left in child + * schema. + */ + public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() { + return this.projsFromLeftPartOfJoinKeysInChildSchema; + } + + public Set getProjsFromRightPartOfJoinKeysInChildSchema() { + return this.projsFromRightPartOfJoinKeysInChildSchema; + } + + public Set getProjsFromRightPartOfJoinKeysInJoinSchema() { + return this.projsFromRightPartOfJoinKeysInJoinSchema; + } + + private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoin j, RexNode pe) { + JoinLeafPredicateInfo jlpi = null; + List filterNulls = new ArrayList(); + List joinKeyExprsFromLeft = new ArrayList(); + List joinKeyExprsFromRight = new ArrayList(); + Set projsFromLeftPartOfJoinKeysInChildSchema = new HashSet(); + Set projsFromRightPartOfJoinKeysInChildSchema = new HashSet(); + Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet(); + int rightOffSet = j.getLeft().getRowType().getFieldCount(); + + // 1. Split leaf join predicate to expressions from left, right + RelOptUtil.splitJoinCondition(j.getSystemFieldList(), j.getLeft(), j.getRight(), pe, + joinKeyExprsFromLeft, joinKeyExprsFromRight, filterNulls, null); + + // 2. For left expressions, collect child projection indexes used + InputReferencedVisitor irvLeft = new InputReferencedVisitor(); + irvLeft.apply(joinKeyExprsFromLeft); + projsFromLeftPartOfJoinKeysInChildSchema.addAll(irvLeft.inputPosReferenced); + + // 3. For right expressions, collect child projection indexes used + InputReferencedVisitor irvRight = new InputReferencedVisitor(); + irvRight.apply(joinKeyExprsFromRight); + projsFromRightPartOfJoinKeysInChildSchema.addAll(irvRight.inputPosReferenced); + + // 3. Translate projection indexes from right to join schema, by adding + // offset. + for (Integer indx : projsFromRightPartOfJoinKeysInChildSchema) { + projsFromRightPartOfJoinKeysInJoinSchema.add(indx + rightOffSet); + } + + // 4. Construct JoinLeafPredicateInfo + jlpi = new JoinLeafPredicateInfo(pe.getKind(), joinKeyExprsFromLeft, joinKeyExprsFromRight, + projsFromLeftPartOfJoinKeysInChildSchema, projsFromRightPartOfJoinKeysInChildSchema, + projsFromRightPartOfJoinKeysInJoinSchema); + + return jlpi; + } + } + + public static boolean limitRelNode(RelNode rel) { + if ((rel instanceof Sort) && ((Sort) rel).getCollation().getFieldCollations().isEmpty()) + return true; + + return false; + } + + public static boolean orderRelNode(RelNode rel) { + if ((rel instanceof Sort) && !((Sort) rel).getCollation().getFieldCollations().isEmpty()) + return true; + + return false; + } + + /** + * Get top level select starting from root. Assumption here is root can only + * be Sort & Project. Also the top project should be at most 2 levels + * below Sort; i.e Sort(Limit)-Sort(OB)-Select + * + * @param rootRel + * @return + */ + public static Pair getTopLevelSelect(final RelNode rootRel) { + RelNode tmpRel = rootRel; + RelNode parentOforiginalProjRel = rootRel; + HiveProject originalProjRel = null; + + while (tmpRel != null) { + if (tmpRel instanceof HiveProject) { + originalProjRel = (HiveProject) tmpRel; + break; + } + parentOforiginalProjRel = tmpRel; + tmpRel = tmpRel.getInput(0); + } + + return (new Pair(parentOforiginalProjRel, originalProjRel)); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java new file mode 100644 index 0000000..837399b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys; + +import com.google.common.collect.ImmutableList; + +public class HiveDefaultRelMetadataProvider { + private HiveDefaultRelMetadataProvider() { + } + + public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList + .of(HiveRelMdDistinctRowCount.SOURCE, + HiveRelMdSelectivity.SOURCE, + HiveRelMdRowCount.SOURCE, + HiveRelMdUniqueKeys.SOURCE, + new DefaultRelMetadataProvider())); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java new file mode 100644 index 0000000..10fdcc6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.type.RelDataTypeSystemImpl; +import org.apache.calcite.sql.type.SqlTypeName; + +public class HiveTypeSystemImpl extends RelDataTypeSystemImpl { + // TODO: This should come from type system; Currently there is no definition + // in type system for this. + private static final int MAX_DECIMAL_PRECISION = 38; + private static final int MAX_DECIMAL_SCALE = 38; + private static final int DEFAULT_DECIMAL_PRECISION = 10; + private static final int MAX_VARCHAR_PRECISION = 65535; + private static final int MAX_CHAR_PRECISION = 255; + private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE; + private static final int MAX_TIMESTAMP_PRECISION = 9; + + @Override + public int getMaxScale(SqlTypeName typeName) { + switch (typeName) { + case DECIMAL: + return getMaxNumericScale(); + case INTERVAL_DAY_TIME: + case INTERVAL_YEAR_MONTH: + return SqlTypeName.MAX_INTERVAL_FRACTIONAL_SECOND_PRECISION; + default: + return -1; + } + } + + @Override + public int getDefaultPrecision(SqlTypeName typeName) { + switch (typeName) { + // Hive will always require user to specify exact sizes for char, varchar; + // Binary doesn't need any sizes; Decimal has the default of 10. + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + case TIME: + case TIMESTAMP: + return getMaxPrecision(typeName); + case DECIMAL: + return DEFAULT_DECIMAL_PRECISION; + case INTERVAL_DAY_TIME: + case INTERVAL_YEAR_MONTH: + return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION; + default: + return -1; + } + } + + @Override + public int getMaxPrecision(SqlTypeName typeName) { + switch (typeName) { + case DECIMAL: + return getMaxNumericPrecision(); + case VARCHAR: + return MAX_VARCHAR_PRECISION; + case CHAR: + return MAX_CHAR_PRECISION; + case VARBINARY: + case BINARY: + return MAX_BINARY_PRECISION; + case TIME: + case TIMESTAMP: + return MAX_TIMESTAMP_PRECISION; + case INTERVAL_DAY_TIME: + case INTERVAL_YEAR_MONTH: + return SqlTypeName.MAX_INTERVAL_START_PRECISION; + default: + return -1; + } + } + + @Override + public int getMaxNumericScale() { + return MAX_DECIMAL_SCALE; + } + + @Override + public int getMaxNumericPrecision() { + return MAX_DECIMAL_PRECISION; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java new file mode 100644 index 0000000..6d57a8d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -0,0 +1,355 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.calcite.plan.RelOptAbstractTable; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptUtil.InputFinder; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.logical.LogicalTableScan; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.stats.StatsUtils; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; + +public class RelOptHiveTable extends RelOptAbstractTable { + private final Table hiveTblMetadata; + private final String tblAlias; + private final ImmutableList hiveNonPartitionCols; + private final ImmutableMap hiveNonPartitionColsMap; + private final ImmutableMap hivePartitionColsMap; + private final int noOfProjs; + final HiveConf hiveConf; + + private double rowCount = -1; + Map hiveColStatsMap = new HashMap(); + PrunedPartitionList partitionList; + Map partitionCache; + AtomicInteger noColsMissingStats; + + protected static final Log LOG = LogFactory + .getLog(RelOptHiveTable.class + .getName()); + + public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, String tblAlias, RelDataType rowType, + Table hiveTblMetadata, List hiveNonPartitionCols, + List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) { + super(calciteSchema, qualifiedTblName, rowType); + this.hiveTblMetadata = hiveTblMetadata; + this.tblAlias = tblAlias; + this.hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); + this.hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); + this.hivePartitionColsMap = getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); + this.noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); + this.hiveConf = hconf; + this.partitionCache = partitionCache; + this.noColsMissingStats = noColsMissingStats; + } + + private static ImmutableMap getColInfoMap(List hiveCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (ColumnInfo ci : hiveCols) { + bldr.put(indx, ci); + indx++; + } + + return bldr.build(); + } + + @Override + public boolean isKey(ImmutableBitSet arg0) { + return false; + } + + @Override + public RelNode toRel(ToRelContext context) { + return new LogicalTableScan(context.getCluster(), this); + } + + @Override + public T unwrap(Class arg0) { + return arg0.isInstance(this) ? arg0.cast(this) : null; + } + + @Override + public double getRowCount() { + if (rowCount == -1) { + if (null == partitionList) { + // we are here either unpartitioned table or partitioned table with no predicates + computePartitionList(hiveConf, null); + } + if (hiveTblMetadata.isPartitioned()) { + List rowCounts = StatsUtils.getBasicStatForPartitions( + hiveTblMetadata, partitionList.getNotDeniedPartns(), + StatsSetupConst.ROW_COUNT); + rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts); + + } else { + rowCount = StatsUtils.getNumRows(hiveTblMetadata); + } + } + + if (rowCount == -1) + noColsMissingStats.getAndIncrement(); + + return rowCount; + } + + public Table getHiveTableMD() { + return hiveTblMetadata; + } + + public String getTableAlias() { + // NOTE: Calcite considers tbls to be equal if their names are the same. Hence + // we need to provide Calcite the fully qualified table name (dbname.tblname) + // and not the user provided aliases. + // However in HIVE DB name can not appear in select list; in case of join + // where table names differ only in DB name, Hive would require user + // introducing explicit aliases for tbl. + if (tblAlias == null) + return hiveTblMetadata.getTableName(); + else + return tblAlias; + } + + private String getColNamesForLogging(Set colLst) { + StringBuffer sb = new StringBuffer(); + boolean firstEntry = true; + for (String colName : colLst) { + if (firstEntry) { + sb.append(colName); + firstEntry = false; + } else { + sb.append(", " + colName); + } + } + return sb.toString(); + } + + public void computePartitionList(HiveConf conf, RexNode pruneNode) { + + try { + if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { + // there is no predicate on partitioning column, we need all partitions in this case. + partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), partitionCache); + return; + } + + // We have valid pruning expressions, only retrieve qualifying partitions + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true)); + + partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); + } catch (HiveException he) { + throw new RuntimeException(he); + } + } + + private void updateColStats(Set projIndxLst) { + List nonPartColNamesThatRqrStats = new ArrayList(); + List nonPartColIndxsThatRqrStats = new ArrayList(); + List partColNamesThatRqrStats = new ArrayList(); + List partColIndxsThatRqrStats = new ArrayList(); + Set colNamesFailedStats = new HashSet(); + + // 1. Separate required columns to Non Partition and Partition Cols + ColumnInfo tmp; + for (Integer pi : projIndxLst) { + if (hiveColStatsMap.get(pi) == null) { + if ((tmp = hiveNonPartitionColsMap.get(pi)) != null) { + nonPartColNamesThatRqrStats.add(tmp.getInternalName()); + nonPartColIndxsThatRqrStats.add(pi); + } else if ((tmp = hivePartitionColsMap.get(pi)) != null) { + partColNamesThatRqrStats.add(tmp.getInternalName()); + partColIndxsThatRqrStats.add(pi); + } else { + noColsMissingStats.getAndIncrement(); + String logMsg = "Unable to find Column Index: " + pi + ", in " + + hiveTblMetadata.getCompleteName(); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + } + + if (null == partitionList) { + // We could be here either because its an unpartitioned table or because + // there are no pruning predicates on a partitioned table. + computePartitionList(hiveConf, null); + } + + // 2. Obtain Col Stats for Non Partition Cols + if (nonPartColNamesThatRqrStats.size() > 0) { + List hiveColStats; + + if (!hiveTblMetadata.isPartitioned()) { + // 2.1 Handle the case for unpartitioned table. + hiveColStats = StatsUtils.getTableColumnStats(hiveTblMetadata, hiveNonPartitionCols, + nonPartColNamesThatRqrStats); + + // 2.1.1 Record Column Names that we needed stats for but couldn't + if (hiveColStats == null) { + colNamesFailedStats.addAll(nonPartColNamesThatRqrStats); + } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) { + Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats); + + Set setOfObtainedColStats = new HashSet(); + for (ColStatistics cs : hiveColStats) { + setOfObtainedColStats.add(cs.getColumnName()); + } + setOfFiledCols.removeAll(setOfObtainedColStats); + + colNamesFailedStats.addAll(setOfFiledCols); + } + } else { + // 2.2 Obtain col stats for partitioned table. + try { + if (partitionList.getNotDeniedPartns().isEmpty()) { + // no need to make a metastore call + rowCount = 0; + hiveColStats = new ArrayList(); + for (String c : nonPartColNamesThatRqrStats) { + // add empty stats object for each column + hiveColStats.add(new ColStatistics(hiveTblMetadata.getTableName(), c, null)); + } + colNamesFailedStats.clear(); + } else { + Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, + hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, + nonPartColNamesThatRqrStats, true, true); + rowCount = stats.getNumRows(); + hiveColStats = new ArrayList(); + for (String c : nonPartColNamesThatRqrStats) { + ColStatistics cs = stats.getColumnStatisticsFromColName(c); + if (cs != null) { + hiveColStats.add(cs); + } else { + colNamesFailedStats.add(c); + } + } + } + } catch (HiveException e) { + String logMsg = "Collecting stats failed."; + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + + if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) { + for (int i = 0; i < hiveColStats.size(); i++) { + hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i)); + } + } + } + + // 3. Obtain Stats for Partition Cols + if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) { + ColStatistics cStats = null; + for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { + cStats = new ColStatistics(hiveTblMetadata.getTableName(), + partColNamesThatRqrStats.get(i), hivePartitionColsMap.get( + partColIndxsThatRqrStats.get(i)).getTypeName()); + cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i))); + hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); + } + } + + // 4. Warn user if we could get stats for required columns + if (!colNamesFailedStats.isEmpty()) { + String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + + getColNamesForLogging(colNamesFailedStats); + LOG.error(logMsg); + noColsMissingStats.getAndAdd(colNamesFailedStats.size()); + throw new RuntimeException(logMsg); + } + } + + private int getDistinctCount(Set partitions, String partColName) { + Set distinctVals = new HashSet(partitions.size()); + for (Partition partition : partitions) { + distinctVals.add(partition.getSpec().get(partColName)); + } + return distinctVals.size(); + } + + public List getColStat(List projIndxLst) { + ImmutableList.Builder colStatsBldr = ImmutableList. builder(); + + if (projIndxLst != null) { + updateColStats(new HashSet(projIndxLst)); + for (Integer i : projIndxLst) { + colStatsBldr.add(hiveColStatsMap.get(i)); + } + } else { + List pILst = new ArrayList(); + for (Integer i = 0; i < noOfProjs; i++) { + pILst.add(i); + } + updateColStats(new HashSet(pILst)); + for (Integer pi : pILst) { + colStatsBldr.add(hiveColStatsMap.get(pi)); + } + } + + return colStatsBldr.build(); + } + + /* + * use to check if a set of columns are all partition columns. + * true only if: + * - all columns in BitSet are partition + * columns. + */ + public boolean containsPartitionColumnsOnly(ImmutableBitSet cols) { + + for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) { + if (!hivePartitionColsMap.containsKey(i)) { + return false; + } + } + return true; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java new file mode 100644 index 0000000..f182846 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; + +public class TraitsUtil { + public static RelTraitSet getSortTraitSet(RelOptCluster cluster, RelTraitSet traitSet, + RelCollation collation) { + return traitSet.plus(collation); + } + + public static RelTraitSet getDefaultTraitSet(RelOptCluster cluster) { + return cluster.traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java new file mode 100644 index 0000000..71b6680 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptCostFactory; +import org.apache.calcite.plan.RelOptUtil; + +// TODO: This should inherit from VolcanoCost and should just override isLE method. +public class HiveCost implements RelOptCost { + // ~ Static fields/initializers --------------------------------------------- + + public static final HiveCost INFINITY = new HiveCost(Double.POSITIVE_INFINITY, + Double.POSITIVE_INFINITY, + Double.POSITIVE_INFINITY) { + @Override + public String toString() { + return "{inf}"; + } + }; + + public static final HiveCost HUGE = new HiveCost(Double.MAX_VALUE, Double.MAX_VALUE, + Double.MAX_VALUE) { + @Override + public String toString() { + return "{huge}"; + } + }; + + public static final HiveCost ZERO = new HiveCost(0.0, 0.0, 0.0) { + @Override + public String toString() { + return "{0}"; + } + }; + + public static final HiveCost TINY = new HiveCost(1.0, 1.0, 0.0) { + @Override + public String toString() { + return "{tiny}"; + } + }; + + public static final RelOptCostFactory FACTORY = new Factory(); + + // ~ Instance fields -------------------------------------------------------- + + final double cpu; + final double io; + final double rowCount; + + // ~ Constructors ----------------------------------------------------------- + + HiveCost(double rowCount, double cpu, double io) { + assert rowCount >= 0d; + assert cpu >= 0d; + assert io >= 0d; + this.rowCount = rowCount; + this.cpu = cpu; + this.io = io; + } + + // ~ Methods ---------------------------------------------------------------- + + public double getCpu() { + return cpu; + } + + public boolean isInfinite() { + return (this == INFINITY) || (this.rowCount == Double.POSITIVE_INFINITY) + || (this.cpu == Double.POSITIVE_INFINITY) || (this.io == Double.POSITIVE_INFINITY); + } + + public double getIo() { + return io; + } + + // TODO: If two cost is equal, could we do any better than comparing + // cardinality (may be some other heuristics to break the tie) + public boolean isLe(RelOptCost other) { + return this == other || this.rowCount <= other.getRows(); + /* + * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) || + * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows + * <= other.getRows())) { return true; } else { return false; } + */ + } + + public boolean isLt(RelOptCost other) { + return this.rowCount < other.getRows(); + /* + * return isLe(other) && !equals(other); + */ + } + + public double getRows() { + return rowCount; + } + + public boolean equals(RelOptCost other) { + return (this == other) || ((this.rowCount) == (other.getRows())); + + /* + * //TODO: should we consider cardinality as well? return (this == other) || + * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo())); + */ + } + + public boolean isEqWithEpsilon(RelOptCost other) { + return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON); + // Turn this one once we do the Algorithm selection in CBO + /* + * return (this == other) || (Math.abs((this.dCpu + this.dIo) - + * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); + */ + } + + public RelOptCost minus(RelOptCost other) { + if (this == INFINITY) { + return this; + } + + return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io + - other.getIo()); + } + + public RelOptCost multiplyBy(double factor) { + if (this == INFINITY) { + return this; + } + return new HiveCost(rowCount * factor, cpu * factor, io * factor); + } + + public double divideBy(RelOptCost cost) { + // Compute the geometric average of the ratios of all of the factors + // which are non-zero and finite. + double d = 1; + double n = 0; + if ((this.rowCount != 0) && !Double.isInfinite(this.rowCount) && (cost.getRows() != 0) + && !Double.isInfinite(cost.getRows())) { + d *= this.rowCount / cost.getRows(); + ++n; + } + if ((this.cpu != 0) && !Double.isInfinite(this.cpu) && (cost.getCpu() != 0) + && !Double.isInfinite(cost.getCpu())) { + d *= this.cpu / cost.getCpu(); + ++n; + } + if ((this.io != 0) && !Double.isInfinite(this.io) && (cost.getIo() != 0) + && !Double.isInfinite(cost.getIo())) { + d *= this.io / cost.getIo(); + ++n; + } + if (n == 0) { + return 1.0; + } + return Math.pow(d, 1 / n); + } + + public RelOptCost plus(RelOptCost other) { + if ((this == INFINITY) || (other.isInfinite())) { + return INFINITY; + } + return new HiveCost(this.rowCount + other.getRows(), this.cpu + other.getCpu(), this.io + + other.getIo()); + } + + @Override + public String toString() { + return "{" + rowCount + " rows, " + cpu + " cpu, " + io + " io}"; + } + + private static class Factory implements RelOptCostFactory { + private Factory() { + } + + public RelOptCost makeCost(double rowCount, double cpu, double io) { + return new HiveCost(rowCount, cpu, io); + } + + public RelOptCost makeHugeCost() { + return HUGE; + } + + public HiveCost makeInfiniteCost() { + return INFINITY; + } + + public HiveCost makeTinyCost() { + return TINY; + } + + public HiveCost makeZeroCost() { + return ZERO; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java new file mode 100644 index 0000000..c7e9217 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +// Use this once we have Join Algorithm selection +public class HiveCostUtil { + private static final double cpuCostInNanoSec = 1.0; + private static final double netCostInNanoSec = 150 * cpuCostInNanoSec; + private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec; + private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec; + private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec; + @SuppressWarnings("unused") +//Use this once we have Join Algorithm selection + private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec; + + public static RelOptCost computCardinalityBasedCost(HiveRelNode hr) { + return new HiveCost(hr.getRows(), 0, 0); + } + + public static HiveCost computeCost(HiveTableScan t) { + double cardinality = t.getRows(); + return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java new file mode 100644 index 0000000..ebcd4f3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.ConventionTraitDef; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.RelCollationTraitDef; + +/** + * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive. + * + *

+ * It uses {@link org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost} as + * its cost model. + */ +public class HiveVolcanoPlanner extends VolcanoPlanner { + private static final boolean ENABLE_COLLATION_TRAIT = true; + + /** Creates a HiveVolcanoPlanner. */ + public HiveVolcanoPlanner() { + super(HiveCost.FACTORY, null); + } + + public static RelOptPlanner createPlanner() { + final VolcanoPlanner planner = new HiveVolcanoPlanner(); + planner.addRelTraitDef(ConventionTraitDef.INSTANCE); + if (ENABLE_COLLATION_TRAIT) { + planner.addRelTraitDef(RelCollationTraitDef.INSTANCE); + } + return planner; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java new file mode 100644 index 0000000..21ddc99 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.RelFactories.AggregateFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +import com.google.common.collect.ImmutableList; + +public class HiveAggregate extends Aggregate implements HiveRelNode { + + public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory(); + + public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, + boolean indicator, ImmutableBitSet groupSet, List groupSets, + List aggCalls) throws InvalidRelException { + super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, indicator, groupSet, + groupSets, aggCalls); + } + + @Override + public Aggregate copy(RelTraitSet traitSet, RelNode input, + boolean indicator, ImmutableBitSet groupSet, + List groupSets, List aggCalls) { + try { + return new HiveAggregate(getCluster(), traitSet, input, indicator, groupSet, + groupSets, aggCalls); + } catch (InvalidRelException e) { + // Semantic error not possible. Must be a bug. Convert to + // internal error. + throw new AssertionError(e); + } + } + + @Override + public void implement(Implementor implementor) { + } + + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public double getRows() { + return RelMetadataQuery.getDistinctRowCount(this, groupSet, getCluster().getRexBuilder() + .makeLiteral(true)); + } + + private static class HiveAggRelFactory implements AggregateFactory { + + @Override + public RelNode createAggregate(RelNode child, boolean indicator, + ImmutableBitSet groupSet, ImmutableList groupSets, + List aggCalls) { + try { + return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, indicator, + groupSet, groupSets, aggCalls); + } catch (InvalidRelException e) { + throw new RuntimeException(e); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java new file mode 100644 index 0000000..3e45a3f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +public class HiveFilter extends Filter implements HiveRelNode { + + public static final FilterFactory DEFAULT_FILTER_FACTORY = new HiveFilterFactoryImpl(); + + public HiveFilter(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) { + super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); + } + + @Override + public Filter copy(RelTraitSet traitSet, RelNode input, RexNode condition) { + assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION); + return new HiveFilter(getCluster(), traitSet, input, getCondition()); + } + + @Override + public void implement(Implementor implementor) { + } + + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + return HiveCost.FACTORY.makeZeroCost(); + } + + /** + * Implementation of {@link FilterFactory} that returns + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter} + * . + */ + private static class HiveFilterFactoryImpl implements FilterFactory { + @Override + public RelNode createFilter(RelNode child, RexNode condition) { + RelOptCluster cluster = child.getCluster(); + HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); + return filter; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java new file mode 100644 index 0000000..724135b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.Collections; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories.JoinFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +//TODO: Should we convert MultiJoin to be a child of HiveJoin +public class HiveJoin extends Join implements HiveRelNode { + // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN + // each parallel computation handles multiple splits where as in case of SMB + // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is + // hash joins where MAP_JOIN keeps the whole data set of non streaming tables + // in memory where as BUCKET_JOIN keeps only the b + public enum JoinAlgorithm { + NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN + } + + public enum MapJoinStreamingRelation { + NONE, LEFT_RELATION, RIGHT_RELATION + } + + public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); + + private final boolean leftSemiJoin; + private final JoinAlgorithm joinAlgorithm; + //This will be used once we do Join Algorithm selection + @SuppressWarnings("unused") + private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE; + + public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right, + RexNode condition, JoinRelType joinType, boolean leftSemiJoin) { + try { + Set variablesStopped = Collections.emptySet(); + return new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, + JoinAlgorithm.NONE, null, leftSemiJoin); + } catch (InvalidRelException e) { + throw new RuntimeException(e); + } + } + + protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, + RexNode condition, JoinRelType joinType, Set variablesStopped, + JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin) + throws InvalidRelException { + super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, + variablesStopped); + this.joinAlgorithm = joinAlgo; + this.leftSemiJoin = leftSemiJoin; + } + + @Override + public void implement(Implementor implementor) { + } + + @Override + public final HiveJoin copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left, + RelNode right, JoinRelType joinType, boolean semiJoinDone) { + try { + Set variablesStopped = Collections.emptySet(); + return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType, + variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin); + } catch (InvalidRelException e) { + // Semantic error not possible. Must be a bug. Convert to + // internal error. + throw new AssertionError(e); + } + } + + public JoinAlgorithm getJoinAlgorithm() { + return joinAlgorithm; + } + + public boolean isLeftSemiJoin() { + return leftSemiJoin; + } + + /** + * Model cost of join as size of Inputs. + */ + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + double leftRCount = RelMetadataQuery.getRowCount(getLeft()); + double rightRCount = RelMetadataQuery.getRowCount(getRight()); + return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); + } + + /** + * @return returns rowtype representing only the left join input + */ + @Override + public RelDataType deriveRowType() { + if (leftSemiJoin) { + return deriveJoinRowType(left.getRowType(), null, JoinRelType.INNER, + getCluster().getTypeFactory(), null, + Collections. emptyList()); + } + return super.deriveRowType(); + } + + private static class HiveJoinFactoryImpl implements JoinFactory { + /** + * Creates a join. + * + * @param left + * Left input + * @param right + * Right input + * @param condition + * Join condition + * @param joinType + * Join type + * @param variablesStopped + * Set of names of variables which are set by the LHS and used by + * the RHS and are not available to nodes above this JoinRel in the + * tree + * @param semiJoinDone + * Whether this join has been translated to a semi-join + */ + @Override + public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType, + Set variablesStopped, boolean semiJoinDone) { + return getJoin(left.getCluster(), left, right, condition, joinType, false); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java new file mode 100644 index 0000000..5fc64f3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +public class HiveLimit extends SingleRel implements HiveRelNode { + private final RexNode offset; + private final RexNode fetch; + + HiveLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RexNode offset, + RexNode fetch) { + super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child); + this.offset = offset; + this.fetch = fetch; + assert getConvention() instanceof HiveRelNode; + assert getConvention() == child.getConvention(); + } + + @Override + public HiveLimit copy(RelTraitSet traitSet, List newInputs) { + return new HiveLimit(getCluster(), traitSet, sole(newInputs), offset, fetch); + } + + public void implement(Implementor implementor) { + } + + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + return HiveCost.FACTORY.makeZeroCost(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java new file mode 100644 index 0000000..6c215c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.RelFactories.ProjectFactory; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.MappingType; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; + +import com.google.common.collect.ImmutableList; + +public class HiveProject extends Project implements HiveRelNode { + + public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl(); + + private final List virtualCols; + + /** + * Creates a HiveProject. + * + * @param cluster + * Cluster this relational expression belongs to + * @param child + * input relational expression + * @param exps + * List of expressions for the input columns + * @param rowType + * output row type + * @param flags + * values as in {@link Project.Flags} + */ + public HiveProject(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, + List exps, RelDataType rowType, int flags) { + super(cluster, traitSet, child, exps, rowType, flags); + virtualCols = ImmutableList.copyOf(HiveCalciteUtil.getVirtualCols(exps)); + } + + /** + * Creates a HiveProject with no sort keys. + * + * @param child + * input relational expression + * @param exps + * set of expressions for the input columns + * @param fieldNames + * aliases of the expressions + */ + public static HiveProject create(RelNode child, List exps, + List fieldNames) throws CalciteSemanticException{ + RelOptCluster cluster = child.getCluster(); + + // 1 Ensure columnNames are unique - CALCITE-411 + if (fieldNames != null && !Util.isDistinct(fieldNames)) { + String msg = "Select list contains multiple expressions with the same name." + fieldNames; + throw new CalciteSemanticException(msg); + } + RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames); + return create(cluster, child, exps, rowType, Collections. emptyList()); + } + + /** + * Creates a HiveProject. + */ + public static HiveProject create(RelOptCluster cluster, RelNode child, List exps, + RelDataType rowType, final List collationList) { + RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster); + return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED); + } + + /** + * Creates a HiveProject. + */ + public static HiveProject create(RelOptCluster cluster, RelNode child, List exps, + RelDataType rowType, RelTraitSet traitSet, final List collationList) { + return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED); + } + + /** + * Creates a relational expression which projects the output fields of a + * relational expression according to a partial mapping. + * + *

+ * A partial mapping is weaker than a permutation: every target has one + * source, but a source may have 0, 1 or more than one targets. Usually the + * result will have fewer fields than the source, unless some source fields + * are projected multiple times. + * + *

+ * This method could optimize the result as {@link #permute} does, but does + * not at present. + * + * @param rel + * Relational expression + * @param mapping + * Mapping from source fields to target fields. The mapping type must + * obey the constraints {@link MappingType#isMandatorySource()} and + * {@link MappingType#isSingleSource()}, as does + * {@link MappingType#INVERSE_FUNCTION}. + * @param fieldNames + * Field names; if null, or if a particular entry is null, the name + * of the permuted field is used + * @return relational expression which projects a subset of the input fields + * @throws CalciteSemanticException + */ + public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) throws CalciteSemanticException { + assert mapping.getMappingType().isSingleSource(); + assert mapping.getMappingType().isMandatorySource(); + + if (mapping.isIdentity()) { + return rel; + } + + final List outputNameList = new ArrayList(); + final List outputProjList = new ArrayList(); + final List fields = rel.getRowType().getFieldList(); + final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + + for (int i = 0; i < mapping.getTargetCount(); i++) { + int source = mapping.getSource(i); + final RelDataTypeField sourceField = fields.get(source); + outputNameList + .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField + .getName() : fieldNames.get(i)); + outputProjList.add(rexBuilder.makeInputRef(rel, source)); + } + + return create(rel, outputProjList, outputNameList); + } + + @Override + public Project copy(RelTraitSet traitSet, RelNode input, List exps, + RelDataType rowType) { + assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION); + return new HiveProject(getCluster(), traitSet, input, exps, rowType, getFlags()); + } + + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public void implement(Implementor implementor) { + } + + public List getVirtualCols() { + return virtualCols; + } + + /** + * Implementation of {@link ProjectFactory} that returns + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject} + * . + */ + private static class HiveProjectFactoryImpl implements ProjectFactory { + + @Override + public RelNode createProject(RelNode child, + List childExprs, List fieldNames) { + RelOptCluster cluster = child.getCluster(); + RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), childExprs, fieldNames); + RelNode project = HiveProject.create(cluster, child, + childExprs, rowType, + child.getTraitSet(), Collections. emptyList()); + + return project; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java new file mode 100644 index 0000000..30acfe2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.rel.RelNode; + +public interface HiveRelNode extends RelNode { + void implement(Implementor implementor); + + /** Calling convention for relational operations that occur in Hive. */ + final Convention CONVENTION = new Convention.Impl("HIVE", HiveRelNode.class); + + class Implementor { + + public void visitChild(int ordinal, RelNode input) { + assert ordinal == 0; + ((HiveRelNode) input).implement(this); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java new file mode 100644 index 0000000..18d2838 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.Map; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; + +import com.google.common.collect.ImmutableMap; + +public class HiveSort extends Sort implements HiveRelNode { + + public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory(); + + // NOTE: this is to work around Hive Calcite Limitations w.r.t OB. + // 1. Calcite can not accept expressions in OB; instead it needs to be expressed + // as VC in input Select. + // 2. Hive can not preserve ordering through select boundaries. + // 3. This map is used for outermost OB to migrate the VC corresponding OB + // expressions from input select. + // 4. This is used by ASTConverter after we are done with Calcite Planning + private ImmutableMap mapOfInputRefToRexCall; + + public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, + RelCollation collation, RexNode offset, RexNode fetch) { + super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation, + offset, fetch); + } + + @Override + public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation, + RexNode offset, RexNode fetch) { + // TODO: can we blindly copy sort trait? What if inputs changed and we + // are now sorting by different cols + RelCollation canonizedCollation = traitSet.canonize(newCollation); + return new HiveSort(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); + } + + public RexNode getFetchExpr() { + return fetch; + } + + public void setInputRefToCallMap(ImmutableMap refToCall) { + this.mapOfInputRefToRexCall = refToCall; + } + + public Map getInputRefToCallMap() { + return this.mapOfInputRefToRexCall; + } + + @Override + public void implement(Implementor implementor) { + } + + private static class HiveSortRelFactory implements RelFactories.SortFactory { + + @Override + public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation, + RexNode offset, RexNode fetch) { + return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java new file mode 100644 index 0000000..53021ea --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + + +/** + * Relational expression representing a scan of a HiveDB collection. + * + *

+ * Additional operations might be applied, using the "find" or "aggregate" + * methods. + *

+ */ +public class HiveTableScan extends TableScan implements HiveRelNode { + + /** + * Creates a HiveTableScan. + * + * @param cluster + * Cluster + * @param traitSet + * Traits + * @param table + * Table + * @param table + * HiveDB table + */ + public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, + RelDataType rowtype) { + super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table); + assert getConvention() == HiveRelNode.CONVENTION; + } + + @Override + public RelNode copy(RelTraitSet traitSet, List inputs) { + assert inputs.isEmpty(); + return this; + } + + @Override + public RelOptCost computeSelfCost(RelOptPlanner planner) { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public void register(RelOptPlanner planner) { + + } + + @Override + public void implement(Implementor implementor) { + + } + + @Override + public double getRows() { + return ((RelOptHiveTable) table).getRowCount(); + } + + public List getColStat(List projIndxLst) { + return ((RelOptHiveTable) table).getColStat(projIndxLst); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java new file mode 100644 index 0000000..72226e7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.sql.SqlKind; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; + +public class HiveUnion extends Union { + + public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory(); + + public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List inputs) { + super(cluster, traits, inputs, true); + } + + @Override + public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { + return new HiveUnion(this.getCluster(), traitSet, inputs); + } + + public void implement(Implementor implementor) { + } + + private static class HiveUnionRelFactory implements RelFactories.SetOpFactory { + + @Override + public RelNode createSetOp(SqlKind kind, List inputs, boolean all) { + if (kind != SqlKind.UNION) { + throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind); + } + return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java new file mode 100644 index 0000000..dcaf831 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.BitSet; +import java.util.List; +import java.util.ListIterator; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelOptUtil.InputFinder; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.rules.FilterJoinRule; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +public abstract class HiveFilterJoinRule extends FilterJoinRule { + + public static final HiveFilterJoinRule FILTER_ON_JOIN = new HiveFilterJoinMergeRule(); + + public static final HiveFilterJoinRule JOIN = new HiveFilterJoinTransposeRule(); + + /** + * Creates a PushFilterPastJoinRule with an explicit root operand. + */ + protected HiveFilterJoinRule(RelOptRuleOperand operand, String id, boolean smart, + RelFactories.FilterFactory filterFactory, RelFactories.ProjectFactory projectFactory) { + super(operand, id, smart, filterFactory, projectFactory); + } + + /** + * Rule that tries to push filter expressions into a join condition and into + * the inputs of the join. + */ + public static class HiveFilterJoinMergeRule extends HiveFilterJoinRule { + public HiveFilterJoinMergeRule() { + super(RelOptRule.operand(Filter.class, + RelOptRule.operand(Join.class, RelOptRule.any())), + "HiveFilterJoinRule:filter", true, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY); + } + + @Override + public void onMatch(RelOptRuleCall call) { + Filter filter = call.rel(0); + Join join = call.rel(1); + super.perform(call, filter, join); + } + } + + public static class HiveFilterJoinTransposeRule extends HiveFilterJoinRule { + public HiveFilterJoinTransposeRule() { + super(RelOptRule.operand(Join.class, RelOptRule.any()), + "HiveFilterJoinRule:no-filter", true, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY); + } + + @Override + public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + super.perform(call, null, join); + } + } + + /* + * Any predicates pushed down to joinFilters that aren't equality conditions: + * put them back as aboveFilters because Hive doesn't support not equi join + * conditions. + */ + @Override + protected void validateJoinFilters(List aboveFilters, List joinFilters, + Join join, JoinRelType joinType) { + if (joinType.equals(JoinRelType.INNER)) { + ListIterator filterIter = joinFilters.listIterator(); + while (filterIter.hasNext()) { + RexNode exp = filterIter.next(); + + if (exp instanceof RexCall) { + RexCall c = (RexCall) exp; + boolean validHiveJoinFilter = false; + + if ((c.getOperator().getKind() == SqlKind.EQUALS)) { + validHiveJoinFilter = true; + for (RexNode rn : c.getOperands()) { + // NOTE: Hive dis-allows projections from both left & right side + // of join condition. Example: Hive disallows + // (r1.x +r2.x)=(r1.y+r2.y) on join condition. + if (filterRefersToBothSidesOfJoin(rn, join)) { + validHiveJoinFilter = false; + break; + } + } + } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN) + || (c.getOperator().getKind() == SqlKind.GREATER_THAN) + || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL) + || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) { + validHiveJoinFilter = true; + // NOTE: Hive dis-allows projections from both left & right side of + // join in in equality condition. Example: Hive disallows (r1.x < + // r2.x) on join condition. + if (filterRefersToBothSidesOfJoin(c, join)) { + validHiveJoinFilter = false; + } + } + + if (validHiveJoinFilter) + continue; + } + + aboveFilters.add(exp); + filterIter.remove(); + } + } + } + + private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) { + boolean refersToBothSides = false; + + int joinNoOfProjects = j.getRowType().getFieldCount(); + ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply( + new BitSet(joinNoOfProjects)); + ImmutableBitSet allLeftProjs = filterProjs.union( + ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount())); + ImmutableBitSet allRightProjs = filterProjs.union( + ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects)); + + filterProjs = filterProjs.union(InputFinder.bits(filter)); + + if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs)) + refersToBothSides = true; + + return refersToBothSides; + } +} + +// End PushFilterPastJoinRule.java + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java new file mode 100644 index 0000000..ba28055 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class HivePartitionPruneRule extends RelOptRule { + + HiveConf conf; + + public HivePartitionPruneRule(HiveConf conf) { + super(operand(HiveFilter.class, operand(HiveTableScan.class, none()))); + this.conf = conf; + } + + @Override + public void onMatch(RelOptRuleCall call) { + HiveFilter filter = call.rel(0); + HiveTableScan tScan = call.rel(1); + perform(call, filter, tScan); + } + + protected void perform(RelOptRuleCall call, Filter filter, + HiveTableScan tScan) { + + RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + RexNode predicate = filter.getCondition(); + + Pair predicates = PartitionPrune + .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate); + RexNode partColExpr = predicates.left; + hiveTable.computePartitionList(conf, partColExpr); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java new file mode 100644 index 0000000..8b90a15 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.rel.rules.ProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +//Currently not used, turn this on later +public class HiveProjectMergeRule extends ProjectMergeRule { + public static final HiveProjectMergeRule INSTANCE = new HiveProjectMergeRule(); + + public HiveProjectMergeRule() { + super(true, HiveProject.DEFAULT_PROJECT_FACTORY); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java new file mode 100644 index 0000000..2fb9a52 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +public class PartitionPrune { + + /** + * Breaks the predicate into 2 pieces. The first piece is the expressions that + * only contain partition columns and can be used for Partition Pruning; the + * second piece is the predicates that are left. + * + * @param cluster + * @param hiveTable + * @param predicate + * @return a Pair of expressions, each of which maybe null. The 1st predicate + * is expressions that only contain partition columns; the 2nd + * predicate contains the remaining predicates. + */ + public static Pair extractPartitionPredicates( + RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) { + RexNode partitionPruningPred = predicate + .accept(new ExtractPartPruningPredicate(cluster, hiveTable)); + RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate( + cluster, partitionPruningPred)); + return new Pair(partitionPruningPred, remainingPred); + } + + public static class ExtractPartPruningPredicate extends + RexVisitorImpl { + + final RelOptHiveTable hiveTable; + final RelDataType rType; + final Set partCols; + final RelOptCluster cluster; + + public ExtractPartPruningPredicate(RelOptCluster cluster, + RelOptHiveTable hiveTable) { + super(true); + this.hiveTable = hiveTable; + rType = hiveTable.getRowType(); + List pfs = hiveTable.getHiveTableMD().getPartCols(); + partCols = new HashSet(); + for (FieldSchema pf : pfs) { + partCols.add(pf.getName()); + } + this.cluster = cluster; + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + if (partCols.contains(f.getName())) { + return inputRef; + } else { + return null; + } + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + List args = new LinkedList(); + boolean argsPruned = false; + + GenericUDF hiveUDF = SqlFunctionConverter.getHiveUDF(call.getOperator(), + call.getType(), call.operands.size()); + if (hiveUDF != null && + !FunctionRegistry.isDeterministic(hiveUDF)) { + return null; + } + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } else { + argsPruned = true; + } + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + return argsPruned ? null : call; + } else { + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } + + } + + public static class ExtractRemainingPredicate extends RexVisitorImpl { + + List pruningPredicates; + final RelOptCluster cluster; + + public ExtractRemainingPredicate(RelOptCluster cluster, + RexNode partPruningExpr) { + super(true); + this.cluster = cluster; + pruningPredicates = new ArrayList(); + flattenPredicates(partPruningExpr); + } + + private void flattenPredicates(RexNode r) { + if (r instanceof RexCall + && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) { + for (RexNode c : ((RexCall) r).getOperands()) { + flattenPredicates(c); + } + } else { + pruningPredicates.add(r); + } + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + return inputRef; + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + if (pruningPredicates.contains(call)) { + return null; + } else { + return call; + } + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } + } + + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java new file mode 100644 index 0000000..b52779c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class FilterSelectivityEstimator extends RexVisitorImpl { + private final RelNode childRel; + private final double childCardinality; + + protected FilterSelectivityEstimator(RelNode childRel) { + super(true); + this.childRel = childRel; + this.childCardinality = RelMetadataQuery.getRowCount(childRel); + } + + public Double estimateSelectivity(RexNode predicate) { + return predicate.accept(this); + } + + public Double visitCall(RexCall call) { + if (!deep) { + return 1.0; + } + + /* + * Ignore any predicates on partition columns because we have already + * accounted for these in the Table row count. + */ + if (isPartitionPredicate(call, this.childRel)) { + return 1.0; + } + + Double selectivity = null; + SqlKind op = getOp(call); + + switch (op) { + case AND: { + selectivity = computeConjunctionSelectivity(call); + break; + } + + case OR: { + selectivity = computeDisjunctionSelectivity(call); + break; + } + + case NOT: + case NOT_EQUALS: { + selectivity = computeNotEqualitySelectivity(call); + break; + } + + case LESS_THAN_OR_EQUAL: + case GREATER_THAN_OR_EQUAL: + case LESS_THAN: + case GREATER_THAN: { + selectivity = ((double) 1 / (double) 3); + break; + } + + case IN: { + // TODO: 1) check for duplicates 2) We assume in clause values to be + // present in NDV which may not be correct (Range check can find it) 3) We + // assume values in NDV set is uniformly distributed over col values + // (account for skewness - histogram). + selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1); + if (selectivity <= 0.0) { + selectivity = 0.10; + } else if (selectivity >= 1.0) { + selectivity = 1.0; + } + break; + } + + default: + selectivity = computeFunctionSelectivity(call); + } + + return selectivity; + } + + /** + * NDV of "f1(x, y, z) != f2(p, q, r)" -> + * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)". + *

+ * + * @param call + * @return + */ + private Double computeNotEqualitySelectivity(RexCall call) { + double tmpNDV = getMaxNDV(call); + + if (tmpNDV > 1) + return (tmpNDV - (double) 1) / tmpNDV; + else + return 1.0; + } + + /** + * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z). + *

+ * Note that >, >=, <, <=, = ... are considered generic functions and uses + * this method to find their selectivity. + * + * @param call + * @return + */ + private Double computeFunctionSelectivity(RexCall call) { + return 1 / getMaxNDV(call); + } + + /** + * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total + * number of tuples from child and m1 and m2 is the expected number of tuples + * from each part of the disjunction predicate. + *

+ * Note we compute m1. m2.. by applying selectivity of the disjunctive element + * on the cardinality from child. + * + * @param call + * @return + */ + private Double computeDisjunctionSelectivity(RexCall call) { + Double tmpCardinality; + Double tmpSelectivity; + double selectivity = 1; + + for (RexNode dje : call.getOperands()) { + tmpSelectivity = dje.accept(this); + if (tmpSelectivity == null) { + tmpSelectivity = 0.99; + } + tmpCardinality = childCardinality * tmpSelectivity; + + if (tmpCardinality > 1 && tmpCardinality < childCardinality) { + tmpSelectivity = (1 - tmpCardinality / childCardinality); + } else { + tmpSelectivity = 1.0; + } + + selectivity *= tmpSelectivity; + } + + if (selectivity < 0.0) + selectivity = 0.0; + + return (1 - selectivity); + } + + /** + * Selectivity of conjunctive predicate -> (selectivity of conjunctive + * element1) * (selectivity of conjunctive element2)... + * + * @param call + * @return + */ + private Double computeConjunctionSelectivity(RexCall call) { + Double tmpSelectivity; + double selectivity = 1; + + for (RexNode cje : call.getOperands()) { + tmpSelectivity = cje.accept(this); + if (tmpSelectivity != null) { + selectivity *= tmpSelectivity; + } + } + + return selectivity; + } + + private Double getMaxNDV(RexCall call) { + double tmpNDV; + double maxNDV = 1.0; + InputReferencedVisitor irv; + + for (RexNode op : call.getOperands()) { + if (op instanceof RexInputRef) { + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, + ((RexInputRef) op).getIndex()); + if (tmpNDV > maxNDV) + maxNDV = tmpNDV; + } else { + irv = new InputReferencedVisitor(); + irv.apply(op); + for (Integer childProjIndx : irv.inputPosReferenced) { + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx); + if (tmpNDV > maxNDV) + maxNDV = tmpNDV; + } + } + } + + return maxNDV; + } + + private boolean isPartitionPredicate(RexNode expr, RelNode r) { + if (r instanceof Project) { + expr = RelOptUtil.pushFilterPastProject(expr, (Project) r); + return isPartitionPredicate(expr, ((Project) r).getInput()); + } else if (r instanceof Filter) { + return isPartitionPredicate(expr, ((Filter) r).getInput()); + } else if (r instanceof HiveTableScan) { + RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScan) r).getTable(); + ImmutableBitSet cols = RelOptUtil.InputFinder.bits(expr); + return table.containsPartitionColumnsOnly(cols); + } + return false; + } + + private SqlKind getOp(RexCall call) { + SqlKind op = call.getKind(); + + if (call.getKind().equals(SqlKind.OTHER_FUNCTION) + && SqlTypeUtil.inBooleanFamily(call.getType())) { + SqlOperator sqlOp = call.getOperator(); + String opName = (sqlOp != null) ? sqlOp.getName() : ""; + if (opName.equalsIgnoreCase("in")) { + op = SqlKind.IN; + } + } + + return op; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java new file mode 100644 index 0000000..1220401 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount { + + private static final HiveRelMdDistinctRowCount INSTANCE = + new HiveRelMdDistinctRowCount(); + + public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider + .of(ImmutableList.of( + + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE), + + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.CUMULATIVE_COST.method, INSTANCE))); + + private HiveRelMdDistinctRowCount() { + } + + // Catch-all rule when none of the others apply. + @Override + public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey, + RexNode predicate) { + if (rel instanceof HiveTableScan) { + return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate); + } + /* + * For now use Calcite' default formulas for propagating NDVs up the Query + * Tree. + */ + return super.getDistinctRowCount(rel, groupKey, predicate); + } + + private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey, + RexNode predicate) { + List projIndxLst = HiveCalciteUtil + .translateBitSetToProjIndx(groupKey); + List colStats = htRel.getColStat(projIndxLst); + Double noDistinctRows = 1.0; + for (ColStatistics cStat : colStats) { + noDistinctRows *= cStat.getCountDistint(); + } + + return Math.min(noDistinctRows, htRel.getRows()); + } + + public static Double getDistinctRowCount(RelNode r, int indx) { + ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx); + return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r + .getCluster().getRexBuilder().makeLiteral(true)); + } + + @Override + public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey, + RexNode predicate) { + if (rel instanceof HiveJoin) { + HiveJoin hjRel = (HiveJoin) rel; + //TODO: Improve this + if (hjRel.isLeftSemiJoin()) { + return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, + rel.getCluster().getRexBuilder().makeLiteral(true)); + } else { + return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(), + groupKey, predicate, true); + } + } + + return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); + } + + /* + * Favor Broad Plans over Deep Plans. + */ + public RelOptCost getCumulativeCost(HiveJoin rel) { + RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel); + List inputs = rel.getInputs(); + RelOptCost maxICost = HiveCost.ZERO; + for (RelNode input : inputs) { + RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input); + if (maxICost.isLt(iCost)) { + maxICost = iCost; + } + } + return cost.plus(maxICost); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java new file mode 100644 index 0000000..dabbe28 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -0,0 +1,439 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdRowCount; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class HiveRelMdRowCount extends RelMdRowCount { + + protected static final Log LOG = LogFactory.getLog(HiveRelMdRowCount.class.getName()); + + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider + .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount()); + + protected HiveRelMdRowCount() { + super(); + } + + public Double getRowCount(Join join) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(join); + if (pkfk != null) { + double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + selectivity = Math.min(1.0, selectivity); + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation:"); + LOG.debug(RelOptUtil.toString(join)); + LOG.debug(pkfk); + } + return pkfk.fkInfo.rowCount * selectivity; + } + return join.getRows(); + } + + public Double getRowCount(SemiJoin rel) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel); + if (pkfk != null) { + double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + selectivity = Math.min(1.0, selectivity); + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation:"); + LOG.debug(RelOptUtil.toString(rel)); + LOG.debug(pkfk); + } + return pkfk.fkInfo.rowCount * selectivity; + } + return super.getRowCount(rel); + } + + static class PKFKRelationInfo { + public final int fkSide; + public final double ndvScalingFactor; + public final FKSideInfo fkInfo; + public final PKSideInfo pkInfo; + public final boolean isPKSideSimple; + + PKFKRelationInfo(int fkSide, + FKSideInfo fkInfo, + PKSideInfo pkInfo, + double ndvScalingFactor, + boolean isPKSideSimple) { + this.fkSide = fkSide; + this.fkInfo = fkInfo; + this.pkInfo = pkInfo; + this.ndvScalingFactor = ndvScalingFactor; + this.isPKSideSimple = isPKSideSimple; + } + + public String toString() { + return String.format( + "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" + + "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n", + fkSide, + fkInfo, + pkInfo, + isPKSideSimple, + ndvScalingFactor); + } + } + + static class FKSideInfo { + public final double rowCount; + public final double distinctCount; + public FKSideInfo(double rowCount, double distinctCount) { + this.rowCount = rowCount; + this.distinctCount = distinctCount; + } + + public String toString() { + return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount); + } + } + + static class PKSideInfo extends FKSideInfo { + public final double selectivity; + public PKSideInfo(double rowCount, double distinctCount, double selectivity) { + super(rowCount, distinctCount); + this.selectivity = selectivity; + } + + public String toString() { + return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity); + } + } + + /* + * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can + * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is + * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor + * based on the Selectivity of the PK/Dim table side. + * + * 1. If both T1.x and T2.y are keys then use the larger one as the PK side. + * 2. In case of outer Joins: a) The FK side should be the Null Preserving + * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact + * or Fact roj Dim b) The selectivity factor applied on the Fact Table should + * be 1. + */ + public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { + + RelNode left = joinRel.getInputs().get(0); + RelNode right = joinRel.getInputs().get(1); + + final List initJoinFilters = RelOptUtil.conjunctions(joinRel + .getCondition()); + + /* + * No joining condition. + */ + if (initJoinFilters.isEmpty()) { + return null; + } + + List leftFilters = new ArrayList(); + List rightFilters = new ArrayList(); + List joinFilters = new ArrayList(initJoinFilters); + + // @todo: remove this. 8/28/14 hb + // for now adding because RelOptUtil.classifyFilters has an assertion about + // column counts that is not true for semiJoins. + if (joinRel instanceof SemiJoin) { + return null; + } + + RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(), + false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel + .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters, + rightFilters); + + Pair joinCols = canHandleJoin(joinRel, leftFilters, + rightFilters, joinFilters); + if (joinCols == null) { + return null; + } + int leftColIdx = joinCols.left; + int rightColIdx = joinCols.right; + + RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); + RexNode leftPred = RexUtil + .composeConjunction(rexBuilder, leftFilters, true); + RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, + true); + ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx); + ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx); + + /* + * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return + * null. + */ + boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel + .getJoinType() == JoinRelType.RIGHT) + && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left); + boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel + .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right); + + if (!leftIsKey && !rightIsKey) { + return null; + } + + double leftRowCount = RelMetadataQuery.getRowCount(left); + double rightRowCount = RelMetadataQuery.getRowCount(right); + + if (leftIsKey && rightIsKey) { + if (rightRowCount < leftRowCount) { + leftIsKey = false; + } + } + + int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1; + + boolean isPKSideSimpleTree = pkSide != -1 ? + IsSimpleTreeOnJoinKey.check( + pkSide == 0 ? left : right, + pkSide == 0 ? leftColIdx : rightColIdx) : false; + + double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1; + + /* + * If the ndv of the PK - FK side don't match, and the PK side is a filter + * on the Key column then scale the NDV on the FK side. + * + * As described by Peter Boncz: http://databasearchitects.blogspot.com/ + * in such cases we can be off by a large margin in the Join cardinality + * estimate. The e.g. he provides is on the join of StoreSales and DateDim + * on the TPCDS dataset. Since the DateDim is populated for 20 years into + * the future, while the StoreSales only has 5 years worth of data, there + * are 40 times fewer distinct dates in StoreSales. + * + * In general it is hard to infer the range for the foreign key on an + * arbitrary expression. For e.g. the NDV for DayofWeek is the same + * irrespective of NDV on the number of unique days, whereas the + * NDV of Quarters has the same ratio as the NDV on the keys. + * + * But for expressions that apply only on columns that have the same NDV + * as the key (implying that they are alternate keys) we can apply the + * ratio. So in the case of StoreSales - DateDim joins for predicate on the + * d_date column we can apply the scaling factor. + */ + double ndvScalingFactor = 1.0; + if ( isPKSideSimpleTree ) { + ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; + } + + if (pkSide == 0) { + FKSideInfo fkInfo = new FKSideInfo(rightRowCount, + rightNDV); + double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount); + PKSideInfo pkInfo = new PKSideInfo(leftRowCount, + leftNDV, + joinRel.getJoinType().generatesNullsOnRight() ? 1.0 : + pkSelectivity); + + return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + } + + if (pkSide == 1) { + FKSideInfo fkInfo = new FKSideInfo(leftRowCount, + leftNDV); + double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount); + PKSideInfo pkInfo = new PKSideInfo(rightRowCount, + rightNDV, + joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : + pkSelectivity); + + return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + } + + return null; + } + + private static double pkSelectivity(Join joinRel, boolean leftChild, + RelNode child, + double childRowCount) { + if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) || + (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) { + return 1.0; + } else { + HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true); + if (tScan != null) { + double tRowCount = RelMetadataQuery.getRowCount(tScan); + return childRowCount / tRowCount; + } else { + return 1.0; + } + } + } + + private static boolean isKey(ImmutableBitSet c, RelNode rel) { + boolean isKey = false; + Set keys = RelMetadataQuery.getUniqueKeys(rel); + if (keys != null) { + for (ImmutableBitSet key : keys) { + if (key.equals(c)) { + isKey = true; + break; + } + } + } + return isKey; + } + + /* + * 1. Join condition must be an Equality Predicate. + * 2. both sides must reference 1 column. + * 3. If needed flip the columns. + */ + private static Pair canHandleJoin(Join joinRel, + List leftFilters, List rightFilters, + List joinFilters) { + + /* + * If after classifying filters there is more than 1 joining predicate, we + * don't handle this. Return null. + */ + if (joinFilters.size() != 1) { + return null; + } + + RexNode joinCond = joinFilters.get(0); + + int leftColIdx; + int rightColIdx; + + if (!(joinCond instanceof RexCall)) { + return null; + } + + if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) { + return null; + } + + ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0)); + ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1)); + + if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) { + return null; + } + + int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size(); + int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size(); + int nSysFields = joinRel.getSystemFieldList().size(); + ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, + nSysFields + nFieldsLeft + nFieldsRight); + /* + * flip column references if join condition specified in reverse order to + * join sources. + */ + if (rightFieldsBitSet.contains(leftCols)) { + ImmutableBitSet t = leftCols; + leftCols = rightCols; + rightCols = t; + } + + leftColIdx = leftCols.nextSetBit(0) - nSysFields; + rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft); + + return new Pair(leftColIdx, rightColIdx); + } + + private static class IsSimpleTreeOnJoinKey extends RelVisitor { + + int joinKey; + boolean simpleTree; + + static boolean check(RelNode r, int joinKey) { + IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey); + v.go(r); + return v.simpleTree; + } + + IsSimpleTreeOnJoinKey(int joinKey) { + super(); + this.joinKey = joinKey; + simpleTree = true; + } + + @Override + public void visit(RelNode node, int ordinal, RelNode parent) { + + if (node instanceof HepRelVertex) { + node = ((HepRelVertex) node).getCurrentRel(); + } + + if (node instanceof TableScan) { + simpleTree = true; + } else if (node instanceof Project) { + simpleTree = isSimple((Project) node); + } else if (node instanceof Filter) { + simpleTree = isSimple((Filter) node); + } else { + simpleTree = false; + } + + if (simpleTree) { + super.visit(node, ordinal, parent); + } + } + + private boolean isSimple(Project project) { + RexNode r = project.getProjects().get(joinKey); + if (r instanceof RexInputRef) { + joinKey = ((RexInputRef) r).getIndex(); + return true; + } + return false; + } + + private boolean isSimple(Filter filter) { + ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); + return isKey(condBits, filter); + } + + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java new file mode 100644 index 0000000..960ec40 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -0,0 +1,242 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdSelectivity; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +import com.google.common.collect.ImmutableMap; + +public class HiveRelMdSelectivity extends RelMdSelectivity { + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.SELECTIVITY.method, + new HiveRelMdSelectivity()); + + protected HiveRelMdSelectivity() { + super(); + } + + public Double getSelectivity(HiveTableScan t, RexNode predicate) { + if (predicate != null) { + FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t); + return filterSelEstmator.estimateSelectivity(predicate); + } + + return 1.0; + } + + public Double getSelectivity(HiveJoin j, RexNode predicate) { + if (j.getJoinType().equals(JoinRelType.INNER)) { + return computeInnerJoinSelectivity(j, predicate); + } + return 1.0; + } + + private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) { + double ndvCrossProduct = 1; + Pair predInfo = + getCombinedPredicateForJoin(j, predicate); + if (!predInfo.getKey()) { + return + new FilterSelectivityEstimator(j). + estimateSelectivity(predInfo.getValue()); + } + + RexNode combinedPredicate = predInfo.getValue(); + JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, + combinedPredicate); + ImmutableMap.Builder colStatMapBuilder = ImmutableMap + .builder(); + ImmutableMap colStatMap; + int rightOffSet = j.getLeft().getRowType().getFieldCount(); + + // 1. Update Col Stats Map with col stats for columns from left side of + // Join which are part of join keys + for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { + colStatMapBuilder.put(ljk, + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk)); + } + + // 2. Update Col Stats Map with col stats for columns from right side of + // Join which are part of join keys + for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { + colStatMapBuilder.put(rjk + rightOffSet, + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk)); + } + colStatMap = colStatMapBuilder.build(); + + // 3. Walk through the Join Condition Building NDV for selectivity + // NDV of the join can not exceed the cardinality of cross join. + List peLst = jpi.getEquiJoinPredicateElements(); + int noOfPE = peLst.size(); + if (noOfPE > 0) { + ndvCrossProduct = exponentialBackoff(peLst, colStatMap); + + if (j.isLeftSemiJoin()) + ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()), + ndvCrossProduct); + else + ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()) + * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct); + } + + // 4. Join Selectivity = 1/NDV + return (1 / ndvCrossProduct); + } + + // 3.2 if conjunctive predicate elements are more than one, then walk + // through them one by one. Compute cross product of NDV. Cross product is + // computed by multiplying the largest NDV of all of the conjunctive + // predicate + // elements with degraded NDV of rest of the conjunctive predicate + // elements. NDV is + // degraded using log function.Finally the ndvCrossProduct is fenced at + // the join + // cross product to ensure that NDV can not exceed worst case join + // cardinality.
+ // NDV of a conjunctive predicate element is the max NDV of all arguments + // to lhs, rhs expressions. + // NDV(JoinCondition) = min (left cardinality * right cardinality, + // ndvCrossProduct(JoinCondition)) + // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2)) + // where pex is the predicate element of join condition with max ndv. + // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr)) + // NDV(expr) = max(NDV( expr args)) + protected double logSmoothing(List peLst, ImmutableMap colStatMap) { + int noOfPE = peLst.size(); + double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap); + if (noOfPE > 1) { + double maxNDVSoFar = ndvCrossProduct; + double ndvToBeSmoothed; + double tmpNDV; + + for (int i = 1; i < noOfPE; i++) { + tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap); + if (tmpNDV > maxNDVSoFar) { + ndvToBeSmoothed = maxNDVSoFar; + maxNDVSoFar = tmpNDV; + ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV; + } else { + ndvToBeSmoothed = tmpNDV; + } + // TODO: revisit the fence + if (ndvToBeSmoothed > 3) + ndvCrossProduct *= Math.log(ndvToBeSmoothed); + else + ndvCrossProduct *= ndvToBeSmoothed; + } + } + return ndvCrossProduct; + } + + /* + * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct = + * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ... + */ + protected double exponentialBackoff(List peLst, + ImmutableMap colStatMap) { + int noOfPE = peLst.size(); + List ndvs = new ArrayList(noOfPE); + for (int i = 0; i < noOfPE; i++) { + ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap)); + } + Collections.sort(ndvs); + Collections.reverse(ndvs); + double ndvCrossProduct = 1.0; + for (int i = 0; i < ndvs.size(); i++) { + double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i)); + ndvCrossProduct *= n; + } + return ndvCrossProduct; + } + + /** + * + * @param j + * @param additionalPredicate + * @return if predicate is the join condition return (true, joinCond) + * else return (false, minusPred) + */ + private Pair getCombinedPredicateForJoin(HiveJoin j, RexNode additionalPredicate) { + RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate, + j.getCondition()); + + if (minusPred != null) { + List minusList = new ArrayList(); + minusList.add(j.getCondition()); + minusList.add(minusPred); + + return new Pair(false, minusPred); + } + + return new Pair(true,j.getCondition()); + } + + /** + * Compute Max NDV to determine Join Selectivity. + * + * @param jlpi + * @param colStatMap + * Immutable Map of Projection Index (in Join Schema) to Column Stat + * @param rightProjOffSet + * @return + */ + private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi, + ImmutableMap colStatMap) { + Double maxNDVSoFar = 1.0; + + maxNDVSoFar = getMaxNDVFromProjections(colStatMap, + jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar); + maxNDVSoFar = getMaxNDVFromProjections(colStatMap, + jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar); + + return maxNDVSoFar; + } + + private static Double getMaxNDVFromProjections(Map colStatMap, + Set projectionSet, Double defaultMaxNDV) { + Double colNDV = null; + Double maxNDVSoFar = defaultMaxNDV; + + for (Integer projIndx : projectionSet) { + colNDV = colStatMap.get(projIndx); + if (colNDV > maxNDVSoFar) + maxNDVSoFar = colNDV; + } + + return maxNDVSoFar; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java new file mode 100644 index 0000000..95515b2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.metadata.BuiltInMetadata; +import org.apache.calcite.rel.metadata.Metadata; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdUniqueKeys; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BitSets; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import com.google.common.base.Function; + +public class HiveRelMdUniqueKeys { + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider + .reflectiveSource(BuiltInMethod.UNIQUE_KEYS.method, + new HiveRelMdUniqueKeys()); + + /* + * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) + * - min(col) = rowCount(col) + * + * Why are we intercepting Project and not TableScan? Because if we + * have a method for TableScan, it will not know which columns to check for. + * Inferring Uniqueness for all columns is very expensive right now. The flip + * side of doing this is, it only works post Field Trimming. + */ + public Set getUniqueKeys(Project rel, boolean ignoreNulls) { + + HiveTableScan tScan = getTableScan(rel.getInput(), false); + + if ( tScan == null ) { + Function fn = RelMdUniqueKeys.SOURCE.apply( + rel.getClass(), BuiltInMetadata.UniqueKeys.class); + return ((BuiltInMetadata.UniqueKeys) fn.apply(rel)) + .getUniqueKeys(ignoreNulls); + } + + Map posMap = new HashMap(); + int projectPos = 0; + int colStatsPos = 0; + + BitSet projectedCols = new BitSet(); + for (RexNode r : rel.getProjects()) { + if (r instanceof RexInputRef) { + projectedCols.set(((RexInputRef) r).getIndex()); + posMap.put(colStatsPos, projectPos); + colStatsPos++; + } + projectPos++; + } + + double numRows = tScan.getRows(); + List colStats = tScan.getColStat(BitSets + .toList(projectedCols)); + Set keys = new HashSet(); + + colStatsPos = 0; + for (ColStatistics cStat : colStats) { + boolean isKey = false; + if (cStat.getCountDistint() >= numRows) { + isKey = true; + } + if ( !isKey && cStat.getRange() != null && + cStat.getRange().maxValue != null && + cStat.getRange().minValue != null) { + double r = cStat.getRange().maxValue.doubleValue() - + cStat.getRange().minValue.doubleValue() + 1; + isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); + } + if ( isKey ) { + ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); + keys.add(key); + } + colStatsPos++; + } + + return keys; + } + + /* + * traverse a path of Filter, Projects to get to the TableScan. + * In case of Unique keys, stop if you reach a Project, it will be handled + * by the invocation on the Project. + * In case of getting the base rowCount of a Path, keep going past a Project. + */ + static HiveTableScan getTableScan(RelNode r, boolean traverseProject) { + + while (r != null && !(r instanceof HiveTableScan)) { + if (r instanceof HepRelVertex) { + r = ((HepRelVertex) r).getCurrentRel(); + } else if (r instanceof Filter) { + r = ((Filter) r).getInput(); + } else if (traverseProject && r instanceof Project) { + r = ((Project) r).getInput(); + } else { + r = null; + } + } + return r == null ? null : (HiveTableScan) r; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java new file mode 100644 index 0000000..e6e6fe3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Calendar; + +import org.apache.calcite.avatica.ByteString; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.ParseDriver; + +class ASTBuilder { + + static ASTBuilder construct(int tokenType, String text) { + ASTBuilder b = new ASTBuilder(); + b.curr = createAST(tokenType, text); + return b; + } + + static ASTNode createAST(int tokenType, String text) { + return (ASTNode) ParseDriver.adaptor.create(tokenType, text); + } + + static ASTNode destNode() { + return ASTBuilder + .construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION") + .add( + ASTBuilder.construct(HiveParser.TOK_DIR, "TOK_DIR").add(HiveParser.TOK_TMP_FILE, + "TOK_TMP_FILE")).node(); + } + + static ASTNode table(TableScan scan) { + RelOptHiveTable hTbl = (RelOptHiveTable) scan.getTable(); + ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABREF, "TOK_TABREF").add( + ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME") + .add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName()) + .add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName())); + + // NOTE: Calcite considers tbls to be equal if their names are the same. Hence + // we need to provide Calcite the fully qualified table name (dbname.tblname) + // and not the user provided aliases. + // However in HIVE DB name can not appear in select list; in case of join + // where table names differ only in DB name, Hive would require user + // introducing explicit aliases for tbl. + b.add(HiveParser.Identifier, hTbl.getTableAlias()); + return b.node(); + } + + static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond, + boolean semiJoin) { + ASTBuilder b = null; + + switch (joinType) { + case INNER: + if (semiJoin) { + b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN"); + } else { + b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN"); + } + break; + case LEFT: + b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN"); + break; + case RIGHT: + b = ASTBuilder.construct(HiveParser.TOK_RIGHTOUTERJOIN, "TOK_RIGHTOUTERJOIN"); + break; + case FULL: + b = ASTBuilder.construct(HiveParser.TOK_FULLOUTERJOIN, "TOK_FULLOUTERJOIN"); + break; + } + + b.add(left).add(right).add(cond); + return b.node(); + } + + static ASTNode subQuery(ASTNode qry, String alias) { + return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry) + .add(HiveParser.Identifier, alias).node(); + } + + static ASTNode qualifiedName(String tableName, String colName) { + ASTBuilder b = ASTBuilder + .construct(HiveParser.DOT, ".") + .add( + ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add( + HiveParser.Identifier, tableName)).add(HiveParser.Identifier, colName); + return b.node(); + } + + static ASTNode unqualifiedName(String colName) { + ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add( + HiveParser.Identifier, colName); + return b.node(); + } + + static ASTNode where(ASTNode cond) { + return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node(); + } + + static ASTNode having(ASTNode cond) { + return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node(); + } + + static ASTNode limit(Object value) { + return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT") + .add(HiveParser.Number, value.toString()).node(); + } + + static ASTNode selectExpr(ASTNode expr, String alias) { + return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr) + .add(HiveParser.Identifier, alias).node(); + } + + static ASTNode literal(RexLiteral literal) { + return literal(literal, false); + } + + static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) { + Object val = null; + int type = 0; + SqlTypeName sqlType = literal.getType().getSqlTypeName(); + + switch (sqlType) { + case BINARY: + ByteString bs = (ByteString) literal.getValue(); + val = bs.byteAt(0); + type = HiveParser.BigintLiteral; + break; + case TINYINT: + if (useTypeQualInLiteral) { + val = literal.getValue3() + "Y"; + } else { + val = literal.getValue3(); + } + type = HiveParser.TinyintLiteral; + break; + case SMALLINT: + if (useTypeQualInLiteral) { + val = literal.getValue3() + "S"; + } else { + val = literal.getValue3(); + } + type = HiveParser.SmallintLiteral; + break; + case INTEGER: + val = literal.getValue3(); + type = HiveParser.BigintLiteral; + break; + case BIGINT: + if (useTypeQualInLiteral) { + val = literal.getValue3() + "L"; + } else { + val = literal.getValue3(); + } + type = HiveParser.BigintLiteral; + break; + case DOUBLE: + val = literal.getValue3() + "D"; + type = HiveParser.Number; + break; + case DECIMAL: + val = literal.getValue3() + "BD"; + type = HiveParser.DecimalLiteral; + break; + case FLOAT: + case REAL: + val = literal.getValue3(); + type = HiveParser.Number; + break; + case VARCHAR: + case CHAR: + val = literal.getValue3(); + String escapedVal = BaseSemanticAnalyzer.escapeSQLString(String.valueOf(val)); + type = HiveParser.StringLiteral; + val = "'" + escapedVal + "'"; + break; + case BOOLEAN: + val = literal.getValue3(); + type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE; + break; + case DATE: { + val = literal.getValue(); + type = HiveParser.TOK_DATELITERAL; + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + val = df.format(((Calendar) val).getTime()); + val = "'" + val + "'"; + } + break; + case TIME: + case TIMESTAMP: { + val = literal.getValue(); + type = HiveParser.TOK_TIMESTAMPLITERAL; + DateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); + val = df.format(((Calendar) val).getTime()); + val = "'" + val + "'"; + } + break; + case NULL: + type = HiveParser.TOK_NULL; + break; + + default: + throw new RuntimeException("Unsupported Type: " + sqlType); + } + + return (ASTNode) ParseDriver.adaptor.create(type, String.valueOf(val)); + } + + ASTNode curr; + + ASTNode node() { + return curr; + } + + ASTBuilder add(int tokenType, String text) { + ParseDriver.adaptor.addChild(curr, createAST(tokenType, text)); + return this; + } + + ASTBuilder add(ASTBuilder b) { + ParseDriver.adaptor.addChild(curr, b.curr); + return this; + } + + ASTBuilder add(ASTNode n) { + if (n != null) { + ParseDriver.adaptor.addChild(curr, n); + } + return this; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java new file mode 100644 index 0000000..c02a65e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -0,0 +1,668 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; +import org.apache.calcite.rex.RexWindowBound; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.BitSets; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.ParseDriver; + +import com.google.common.collect.Iterables; + +public class ASTConverter { + private static final Log LOG = LogFactory.getLog(ASTConverter.class); + + private RelNode root; + private HiveAST hiveAST; + private RelNode from; + private Filter where; + private Aggregate groupBy; + private Filter having; + private Project select; + private Sort order; + private Sort limit; + + private Schema schema; + + private long derivedTableCount; + + ASTConverter(RelNode root, long dtCounterInitVal) { + this.root = root; + hiveAST = new HiveAST(); + this.derivedTableCount = dtCounterInitVal; + } + + public static ASTNode convert(final RelNode relNode, List resultSchema) + throws CalciteSemanticException { + RelNode root = PlanModifierForASTConv.convertOpTree(relNode, resultSchema); + ASTConverter c = new ASTConverter(root, 0); + return c.convert(); + } + + private ASTNode convert() { + /* + * 1. Walk RelNode Graph; note from, where, gBy.. nodes. + */ + new QBVisitor().go(root); + + /* + * 2. convert from node. + */ + QueryBlockInfo qb = convertSource(from); + schema = qb.schema; + hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node(); + + /* + * 3. convert filterNode + */ + if (where != null) { + ASTNode cond = where.getCondition().accept(new RexVisitor(schema)); + hiveAST.where = ASTBuilder.where(cond); + } + + /* + * 4. GBy + */ + if (groupBy != null) { + ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); + for (int i : BitSets.toIter(groupBy.getGroupSet())) { + RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY)); + b.add(iRef.accept(new RexVisitor(schema))); + } + + if (!groupBy.getGroupSet().isEmpty()) + hiveAST.groupBy = b.node(); + schema = new Schema(schema, groupBy); + } + + /* + * 5. Having + */ + if (having != null) { + ASTNode cond = having.getCondition().accept(new RexVisitor(schema)); + hiveAST.having = ASTBuilder.having(cond); + } + + /* + * 6. Project + */ + ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT"); + + if (select.getChildExps().isEmpty()) { + RexLiteral r = select.getCluster().getRexBuilder().makeExactLiteral(new BigDecimal(1)); + ASTNode selectExpr = ASTBuilder.selectExpr(ASTBuilder.literal(r), "1"); + b.add(selectExpr); + } else { + int i = 0; + + for (RexNode r : select.getChildExps()) { + ASTNode selectExpr = ASTBuilder.selectExpr(r.accept( + new RexVisitor(schema, r instanceof RexLiteral)), + select.getRowType().getFieldNames().get(i++)); + b.add(selectExpr); + } + } + hiveAST.select = b.node(); + + /* + * 7. Order Use in Order By from the block above. RelNode has no pointer to + * parent hence we need to go top down; but OB at each block really belong + * to its src/from. Hence the need to pass in sort for each block from + * its parent. + */ + convertOBToASTNode((HiveSort) order); + + // 8. Limit + convertLimitToASTNode((HiveSort) limit); + + return hiveAST.getAST(); + } + + private void convertLimitToASTNode(HiveSort limit) { + if (limit != null) { + HiveSort hiveLimit = (HiveSort) limit; + RexNode limitExpr = hiveLimit.getFetchExpr(); + if (limitExpr != null) { + Object val = ((RexLiteral) limitExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(val); + } + } + } + + private void convertOBToASTNode(HiveSort order) { + if (order != null) { + HiveSort hiveSort = (HiveSort) order; + if (!hiveSort.getCollation().getFieldCollations().isEmpty()) { + // 1 Add order by token + ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); + + schema = new Schema((HiveSort) hiveSort); + Map obRefToCallMap = hiveSort.getInputRefToCallMap(); + RexNode obExpr; + ASTNode astCol; + for (RelFieldCollation c : hiveSort.getCollation().getFieldCollations()) { + + // 2 Add Direction token + ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + + // 3 Convert OB expr (OB Expr is usually an input ref except for top + // level OB; top level OB will have RexCall kept in a map.) + obExpr = null; + if (obRefToCallMap != null) + obExpr = obRefToCallMap.get(c.getFieldIndex()); + + if (obExpr != null) { + astCol = obExpr.accept(new RexVisitor(schema)); + } else { + ColumnInfo cI = schema.get(c.getFieldIndex()); + /* + * The RowResolver setup for Select drops Table associations. So + * setup ASTNode on unqualified name. + */ + astCol = ASTBuilder.unqualifiedName(cI.column); + } + + // 4 buildup the ob expr AST + directionAST.addChild(astCol); + orderAst.addChild(directionAST); + } + hiveAST.order = orderAst; + } + } + } + + private Schema getRowSchema(String tblAlias) { + return new Schema(select, tblAlias); + } + + private QueryBlockInfo convertSource(RelNode r) { + Schema s; + ASTNode ast; + + if (r instanceof TableScan) { + TableScan f = (TableScan) r; + s = new Schema(f); + ast = ASTBuilder.table(f); + } else if (r instanceof Join) { + Join join = (Join) r; + QueryBlockInfo left = convertSource(join.getLeft()); + QueryBlockInfo right = convertSource(join.getRight()); + s = new Schema(left.schema, right.schema); + ASTNode cond = join.getCondition().accept(new RexVisitor(s)); + boolean semiJoin = join instanceof SemiJoin; + ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin); + if (semiJoin) + s = left.schema; + } else if (r instanceof Union) { + RelNode leftInput = ((Union) r).getInput(0); + RelNode rightInput = ((Union) r).getInput(1); + + ASTConverter leftConv = new ASTConverter(leftInput, this.derivedTableCount); + ASTConverter rightConv = new ASTConverter(rightInput, this.derivedTableCount); + ASTNode leftAST = leftConv.convert(); + ASTNode rightAST = rightConv.convert(); + + ASTNode unionAST = getUnionAllAST(leftAST, rightAST); + + String sqAlias = nextAlias(); + ast = ASTBuilder.subQuery(unionAST, sqAlias); + s = new Schema((Union) r, sqAlias); + } else { + ASTConverter src = new ASTConverter(r, this.derivedTableCount); + ASTNode srcAST = src.convert(); + String sqAlias = nextAlias(); + s = src.getRowSchema(sqAlias); + ast = ASTBuilder.subQuery(srcAST, sqAlias); + } + return new QueryBlockInfo(s, ast); + } + + class QBVisitor extends RelVisitor { + + public void handle(Filter filter) { + RelNode child = filter.getInput(); + if (child instanceof Aggregate && !((Aggregate) child).getGroupSet().isEmpty()) { + ASTConverter.this.having = filter; + } else { + ASTConverter.this.where = filter; + } + } + + public void handle(Project project) { + if (ASTConverter.this.select == null) { + ASTConverter.this.select = project; + } else { + ASTConverter.this.from = project; + } + } + + @Override + public void visit(RelNode node, int ordinal, RelNode parent) { + + if (node instanceof TableScan) { + ASTConverter.this.from = node; + } else if (node instanceof Filter) { + handle((Filter) node); + } else if (node instanceof Project) { + handle((Project) node); + } else if (node instanceof Join) { + ASTConverter.this.from = node; + } else if (node instanceof Union) { + ASTConverter.this.from = node; + } else if (node instanceof Aggregate) { + ASTConverter.this.groupBy = (Aggregate) node; + } else if (node instanceof Sort) { + if (ASTConverter.this.select != null) { + ASTConverter.this.from = node; + } else { + Sort hiveSortRel = (Sort) node; + if (hiveSortRel.getCollation().getFieldCollations().isEmpty()) + ASTConverter.this.limit = hiveSortRel; + else + ASTConverter.this.order = hiveSortRel; + } + } + /* + * once the source node is reached; stop traversal for this QB + */ + if (ASTConverter.this.from == null) { + node.childrenAccept(this); + } + } + + } + + static class RexVisitor extends RexVisitorImpl { + + private final Schema schema; + private boolean useTypeQualInLiteral; + + protected RexVisitor(Schema schema) { + this(schema, false); + } + + protected RexVisitor(Schema schema, boolean useTypeQualInLiteral) { + super(true); + this.schema = schema; + this.useTypeQualInLiteral = useTypeQualInLiteral; + } + + @Override + public ASTNode visitFieldAccess(RexFieldAccess fieldAccess) { + return ASTBuilder.construct(HiveParser.DOT, ".").add(super.visitFieldAccess(fieldAccess)) + .add(HiveParser.Identifier, fieldAccess.getField().getName()).node(); + } + + @Override + public ASTNode visitInputRef(RexInputRef inputRef) { + ColumnInfo cI = schema.get(inputRef.getIndex()); + if (cI.agg != null) { + return (ASTNode) ParseDriver.adaptor.dupTree(cI.agg); + } + + if (cI.table == null || cI.table.isEmpty()) + return ASTBuilder.unqualifiedName(cI.column); + else + return ASTBuilder.qualifiedName(cI.table, cI.column); + + } + + @Override + public ASTNode visitLiteral(RexLiteral literal) { + return ASTBuilder.literal(literal, useTypeQualInLiteral); + } + + private ASTNode getPSpecAST(RexWindow window) { + ASTNode pSpecAst = null; + + ASTNode dByAst = null; + if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { + dByAst = ASTBuilder.createAST(HiveParser.TOK_DISTRIBUTEBY, "TOK_DISTRIBUTEBY"); + for (RexNode pk : window.partitionKeys) { + ASTNode astCol = pk.accept(this); + dByAst.addChild(astCol); + } + } + + ASTNode oByAst = null; + if (window.orderKeys != null && !window.orderKeys.isEmpty()) { + oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); + for (RexFieldCollation ok : window.orderKeys) { + ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode astCol = ok.left.accept(this); + astNode.addChild(astCol); + oByAst.addChild(astNode); + } + } + + if (dByAst != null || oByAst != null) { + pSpecAst = ASTBuilder.createAST(HiveParser.TOK_PARTITIONINGSPEC, "TOK_PARTITIONINGSPEC"); + if (dByAst != null) + pSpecAst.addChild(dByAst); + if (oByAst != null) + pSpecAst.addChild(oByAst); + } + + return pSpecAst; + } + + private ASTNode getWindowBound(RexWindowBound wb) { + ASTNode wbAST = null; + + if (wb.isCurrentRow()) { + wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT"); + } else { + if (wb.isPreceding()) + wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING"); + else + wbAST = ASTBuilder.createAST(HiveParser.KW_FOLLOWING, "FOLLOWING"); + if (wb.isUnbounded()) { + wbAST.addChild(ASTBuilder.createAST(HiveParser.KW_UNBOUNDED, "UNBOUNDED")); + } else { + ASTNode offset = wb.getOffset().accept(this); + wbAST.addChild(offset); + } + } + + return wbAST; + } + + private ASTNode getWindowRangeAST(RexWindow window) { + ASTNode wRangeAst = null; + + ASTNode startAST = null; + RexWindowBound ub = window.getUpperBound(); + if (ub != null) { + startAST = getWindowBound(ub); + } + + ASTNode endAST = null; + RexWindowBound lb = window.getLowerBound(); + if (lb != null) { + endAST = getWindowBound(lb); + } + + if (startAST != null || endAST != null) { + // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) + if (window.isRows()) + wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE"); + else + wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES"); + if (startAST != null) + wRangeAst.addChild(startAST); + if (endAST != null) + wRangeAst.addChild(endAST); + } + + return wRangeAst; + } + + @Override + public ASTNode visitOver(RexOver over) { + if (!deep) { + return null; + } + + // 1. Translate the UDAF + final ASTNode wUDAFAst = visitCall(over); + + // 2. Add TOK_WINDOW as child of UDAF + ASTNode wSpec = ASTBuilder.createAST(HiveParser.TOK_WINDOWSPEC, "TOK_WINDOWSPEC"); + wUDAFAst.addChild(wSpec); + + // 3. Add Part Spec & Range Spec as child of TOK_WINDOW + final RexWindow window = over.getWindow(); + final ASTNode wPSpecAst = getPSpecAST(window); + final ASTNode wRangeAst = getWindowRangeAST(window); + if (wPSpecAst != null) + wSpec.addChild(wPSpecAst); + if (wRangeAst != null) + wSpec.addChild(wRangeAst); + + return wUDAFAst; + } + + @Override + public ASTNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + SqlOperator op = call.getOperator(); + List astNodeLst = new LinkedList(); + if (op.kind == SqlKind.CAST) { + HiveToken ht = TypeConverter.hiveToken(call.getType()); + ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); + if (ht.args != null) { + for (String castArg : ht.args) + astBldr.add(HiveParser.Identifier, castArg); + } + astNodeLst.add(astBldr.node()); + } + + for (RexNode operand : call.operands) { + astNodeLst.add(operand.accept(this)); + } + + if (isFlat(call)) + return SqlFunctionConverter.buildAST(op, astNodeLst, 0); + else + return SqlFunctionConverter.buildAST(op, astNodeLst); + } + } + + static class QueryBlockInfo { + Schema schema; + ASTNode ast; + + public QueryBlockInfo(Schema schema, ASTNode ast) { + super(); + this.schema = schema; + this.ast = ast; + } + } + + /* + * represents the schema exposed by a QueryBlock. + */ + static class Schema extends ArrayList { + + private static final long serialVersionUID = 1L; + + Schema(TableScan scan) { + String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias(); + for (RelDataTypeField field : scan.getRowType().getFieldList()) { + add(new ColumnInfo(tabName, field.getName())); + } + } + + Schema(Project select, String alias) { + for (RelDataTypeField field : select.getRowType().getFieldList()) { + add(new ColumnInfo(alias, field.getName())); + } + } + + Schema(Union unionRel, String alias) { + for (RelDataTypeField field : unionRel.getRowType().getFieldList()) { + add(new ColumnInfo(alias, field.getName())); + } + } + + Schema(Schema left, Schema right) { + for (ColumnInfo cI : Iterables.concat(left, right)) { + add(cI); + } + } + + Schema(Schema src, Aggregate gBy) { + for (int i : BitSets.toIter(gBy.getGroupSet())) { + ColumnInfo cI = src.get(i); + add(cI); + } + List aggs = gBy.getAggCallList(); + for (AggregateCall agg : aggs) { + int argCount = agg.getArgList().size(); + ASTBuilder b = agg.isDistinct() ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONDI, + "TOK_FUNCTIONDI") : argCount == 0 ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONSTAR, + "TOK_FUNCTIONSTAR") : ASTBuilder.construct(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); + b.add(HiveParser.Identifier, agg.getAggregation().getName()); + for (int i : agg.getArgList()) { + RexInputRef iRef = new RexInputRef(i, gBy.getCluster().getTypeFactory() + .createSqlType(SqlTypeName.ANY)); + b.add(iRef.accept(new RexVisitor(src))); + } + add(new ColumnInfo(null, b.node())); + } + } + + /** + * Assumption:
+ * 1. Project will always be child of Sort.
+ * 2. In Calcite every projection in Project is uniquely named + * (unambigous) without using table qualifier (table name).
+ * + * @param order + * Hive Sort Node + * @return Schema + */ + public Schema(HiveSort order) { + Project select = (Project) order.getInput(); + for (String projName : select.getRowType().getFieldNames()) { + add(new ColumnInfo(null, projName)); + } + } + } + + /* + * represents Column information exposed by a QueryBlock. + */ + static class ColumnInfo { + String table; + String column; + ASTNode agg; + + ColumnInfo(String table, String column) { + super(); + this.table = table; + this.column = column; + } + + ColumnInfo(String table, ASTNode agg) { + super(); + this.table = table; + this.agg = agg; + } + + ColumnInfo(String alias, ColumnInfo srcCol) { + this.table = alias; + this.column = srcCol.column; + this.agg = srcCol.agg; + } + } + + private String nextAlias() { + String tabAlias = String.format("$hdt$_%d", derivedTableCount); + derivedTableCount++; + return tabAlias; + } + + static class HiveAST { + + ASTNode from; + ASTNode where; + ASTNode groupBy; + ASTNode having; + ASTNode select; + ASTNode order; + ASTNode limit; + + public ASTNode getAST() { + ASTBuilder b = ASTBuilder + .construct(HiveParser.TOK_QUERY, "TOK_QUERY") + .add(from) + .add( + ASTBuilder.construct(HiveParser.TOK_INSERT, "TOK_INSERT").add(ASTBuilder.destNode()) + .add(select).add(where).add(groupBy).add(having).add(order).add(limit)); + return b.node(); + } + } + + public ASTNode getUnionAllAST(ASTNode leftAST, ASTNode rightAST) { + + ASTNode unionTokAST = ASTBuilder.construct(HiveParser.TOK_UNION, "TOK_UNION").add(leftAST) + .add(rightAST).node(); + + return unionTokAST; + } + + public static boolean isFlat(RexCall call) { + boolean flat = false; + if (call.operands != null && call.operands.size() > 2) { + SqlOperator op = call.getOperator(); + if (op.getKind() == SqlKind.AND || op.getKind() == SqlKind.OR) { + flat = true; + } + } + + return flat; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java new file mode 100644 index 0000000..cce65f1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.LinkedList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitorImpl; + +/* + * convert a RexNode to an ExprNodeDesc + */ +public class ExprNodeConverter extends RexVisitorImpl { + + RelDataType rType; + String tabAlias; + boolean partitioningExpr; + + public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) { + super(true); + /* + * hb: 6/25/14 for now we only support expressions that only contain + * partition cols. there is no use case for supporting generic expressions. + * for supporting generic exprs., we need to give the converter information + * on whether a column is a partition column or not, whether a column is a + * virtual column or not. + */ + assert partitioningExpr == true; + this.tabAlias = tabAlias; + this.rType = rType; + this.partitioningExpr = partitioningExpr; + } + + @Override + public ExprNodeDesc visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias, + partitioningExpr); + } + + @Override + public ExprNodeDesc visitCall(RexCall call) { + ExprNodeGenericFuncDesc gfDesc = null; + + if (!deep) { + return null; + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + args.add(operand.accept(this)); + } + + // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the + // exprnode + if (ASTConverter.isFlat(call)) { + ArrayList tmpExprArgs = new ArrayList(); + tmpExprArgs.addAll(args.subList(0, 2)); + gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), + SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + for (int i = 2; i < call.operands.size(); i++) { + tmpExprArgs = new ArrayList(); + tmpExprArgs.add(gfDesc); + tmpExprArgs.add(args.get(i)); + gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), + SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + } + } else { + GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF( + call.getOperator(), call.getType(), args.size()); + if (hiveUdf == null) { + throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + call.getOperator() + + "[" + call.getOperator().getKind() + "]/" + args.size()); + } + gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), hiveUdf, args); + } + + return gfDesc; + } + + @Override + public ExprNodeDesc visitLiteral(RexLiteral literal) { + RelDataType lType = literal.getType(); + + switch (literal.getType().getSqlTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral + .booleanValue(literal))); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal + .getValue3()).byteValue())); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + Short.valueOf(((Number) literal.getValue3()).shortValue())); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + Integer.valueOf(((Number) literal.getValue3()).intValue())); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal + .getValue3()).longValue())); + case FLOAT: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + Float.valueOf(((Number) literal.getValue3()).floatValue())); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + Double.valueOf(((Number) literal.getValue3()).doubleValue())); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + new Date(((Calendar)literal.getValue()).getTimeInMillis())); + case TIMESTAMP: { + Object value = literal.getValue3(); + if (value instanceof Long) { + value = new Timestamp((Long)value); + } + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + } + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), + lType.getScale()), literal.getValue3()); + case VARCHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getVarcharTypeInfo(lType.getPrecision()), + new HiveVarchar((String) literal.getValue3(), lType.getPrecision())); + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getCharTypeInfo(lType.getPrecision()), + new HiveChar((String) literal.getValue3(), lType.getPrecision())); + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java new file mode 100644 index 0000000..4cd01c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java @@ -0,0 +1,316 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; + +/** + * JoinCondTypeCheckProcFactory is used by Calcite planner(CBO) to generate Join Conditions from Join Condition AST. + * Reasons for sub class: + * 1. Additional restrictions on what is supported in Join Conditions + * 2. Column handling is different + * 3. Join Condn expr has two input RR as opposed to one. + */ + +/** + * TODO:
+ * 1. Could we use combined RR instead of list of RR ?
+ * 2. Use Column Processing from TypeCheckProcFactory
+ * 3. Why not use GB expr ? + */ +public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory { + + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + return TypeCheckProcFactory.genExprNode(expr, tcCtx, new JoinCondTypeCheckProcFactory()); + } + + /** + * Processor for table columns. + */ + public static class JoinCondColumnExprProcessor extends ColumnExprProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ASTNode expr = (ASTNode) nd; + ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null; + + if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); + return null; + } + + assert (expr.getChildCount() == 1); + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + + boolean qualifiedAccess = (parent != null && parent.getType() == HiveParser.DOT); + + ColumnInfo colInfo = null; + if (!qualifiedAccess) { + colInfo = getColInfo(ctx, null, tableOrCol, expr); + // It's a column. + return new ExprNodeColumnDesc(colInfo); + } else if (hasTableAlias(ctx, tableOrCol, expr)) { + return null; + } else { + // Qualified column access for which table was not found + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(expr)); + } + } + + private static boolean hasTableAlias(JoinTypeCheckCtx ctx, String tabName, ASTNode expr) + throws SemanticException { + int tblAliasCnt = 0; + for (RowResolver rr : ctx.getInputRRList()) { + if (rr.hasTableAlias(tabName)) + tblAliasCnt++; + } + + if (tblAliasCnt > 1) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + + return (tblAliasCnt == 1) ? true : false; + } + + private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, + ASTNode expr) throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + + for (RowResolver rr : ctx.getInputRRList()) { + tmp = rr.get(tabName, colAlias); + if (tmp != null) { + if (cInfoToRet != null) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + cInfoToRet = tmp; + } + } + + return cInfoToRet; + } + } + + /** + * Factory method to get ColumnExprProcessor. + * + * @return ColumnExprProcessor. + */ + @Override + public ColumnExprProcessor getColumnExprProcessor() { + return new JoinCondColumnExprProcessor(); + } + + /** + * The default processor for typechecking. + */ + public static class JoinCondDefaultExprProcessor extends DefaultExprProcessor { + @Override + protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { + JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; + List possibleColumnNames = new ArrayList(); + for (RowResolver rr : jCtx.getInputRRList()) { + possibleColumnNames.addAll(rr.getReferenceableColumnAliases(null, -1)); + } + + return possibleColumnNames; + } + + @Override + protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + Object... nodeOutputs) throws SemanticException { + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) + .getText()); + // NOTE: tableAlias must be a valid non-ambiguous table alias, + // because we've checked that in TOK_TABLE_OR_COL's process method. + ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias, + ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); + + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), tableAlias, + colInfo.getIsVirtualCol()); + } + + private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, + ASTNode expr) throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + + for (RowResolver rr : ctx.getInputRRList()) { + tmp = rr.get(tabName, colAlias); + if (tmp != null) { + if (cInfoToRet != null) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + cInfoToRet = tmp; + } + } + + return cInfoToRet; + } + + @Override + protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, + List children, GenericUDF genericUDF) throws SemanticException { + super.validateUDF(expr, isFunction, ctx, fi, children, genericUDF); + + JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; + + // Join Condition can not contain disjunctions + if (genericUDF instanceof GenericUDFOPOr) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(expr)); + } + + // Non Conjunctive elements have further limitations in Join conditions + if (!(genericUDF instanceof GenericUDFOPAnd)) { + // Non Comparison UDF other than 'and' can not use inputs from both side + if (!(genericUDF instanceof GenericUDFBaseCompare)) { + if (genericUDFargsRefersToBothInput(genericUDF, children, jCtx.getInputRRList())) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + } else if (genericUDF instanceof GenericUDFBaseCompare) { + // Comparisons of non literals LHS/RHS can not refer to inputs from + // both sides + if (children.size() == 2 && !(children.get(0) instanceof ExprNodeConstantDesc) + && !(children.get(1) instanceof ExprNodeConstantDesc)) { + if (comparisonUDFargsRefersToBothInput((GenericUDFBaseCompare) genericUDF, children, + jCtx.getInputRRList())) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + } + } + } + } + + private static boolean genericUDFargsRefersToBothInput(GenericUDF udf, + List children, List inputRRList) { + boolean argsRefersToBothInput = false; + + Map hasCodeToColDescMap = new HashMap(); + for (ExprNodeDesc child : children) { + ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap); + } + Set inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList); + + if (inputRef.size() > 1) + argsRefersToBothInput = true; + + return argsRefersToBothInput; + } + + private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare comparisonUDF, + List children, List inputRRList) { + boolean argsRefersToBothInput = false; + + Map lhsHashCodeToColDescMap = new HashMap(); + Map rhsHashCodeToColDescMap = new HashMap(); + ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap); + ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap); + Set lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList); + Set rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList); + + if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1) + argsRefersToBothInput = true; + + return argsRefersToBothInput; + } + + private static Set getInputRef(Collection colDescSet, + List inputRRList) { + String tableAlias; + RowResolver inputRR; + Set inputLineage = new HashSet(); + + for (ExprNodeDesc col : colDescSet) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col; + tableAlias = colDesc.getTabAlias(); + + for (int i = 0; i < inputRRList.size(); i++) { + inputRR = inputRRList.get(i); + + // If table Alias is present check if InputRR has that table and then + // check for internal name + // else if table alias is null then check with internal name in all + // inputRR. + if (tableAlias != null) { + if (inputRR.hasTableAlias(tableAlias)) { + if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { + inputLineage.add(i); + } + } + } else { + if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { + inputLineage.add(i); + } + } + } + } + + return inputLineage; + } + } + + /** + * Factory method to get DefaultExprProcessor. + * + * @return DefaultExprProcessor. + */ + @Override + public DefaultExprProcessor getDefaultExprProcessor() { + return new JoinCondDefaultExprProcessor(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java new file mode 100644 index 0000000..bbd4723 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.List; + +import org.apache.hadoop.hive.ql.parse.JoinType; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; + +import com.google.common.collect.ImmutableList; + +/** + * JoinTypeCheckCtx is used by Calcite planner(CBO) to generate Join Conditions from Join Condition AST. + * Reasons for sub class: + * 1. Join Conditions can not handle: + * a. Stateful Functions + * b. Distinct + * c. '*' expr + * d. '.*' expr + * e. Windowing expr + * f. Complex type member access + * g. Array Index Access + * h. Sub query + * i. GB expr elimination + * 2. Join Condn expr has two input RR as opposed to one. + */ + +/** + * TODO:
+ * 1. Could we use combined RR instead of list of RR ?
+ * 2. Why not use GB expr ? + */ +public class JoinTypeCheckCtx extends TypeCheckCtx { + private final ImmutableList inputRRLst; + private final boolean outerJoin; + + public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType) + throws SemanticException { + super(RowResolver.getCombinedRR(leftRR, rightRR), false, false, false, false, false, false, + false, false); + this.inputRRLst = ImmutableList.of(leftRR, rightRR); + this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER) + || (hiveJoinType == JoinType.FULLOUTER); + } + + /** + * @return the inputRR List + */ + public List getInputRRList() { + return inputRRLst; + } + + public boolean isOuterJoin() { + return outerJoin; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java new file mode 100644 index 0000000..10eefac --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -0,0 +1,392 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.rules.MultiJoin; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +public class PlanModifierForASTConv { + private static final Log LOG = LogFactory.getLog(PlanModifierForASTConv.class); + + public static RelNode convertOpTree(RelNode rel, List resultSchema) + throws CalciteSemanticException { + RelNode newTopNode = rel; + if (LOG.isDebugEnabled()) { + LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); + } + + if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort)) { + newTopNode = introduceDerivedTable(newTopNode); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after top-level introduceDerivedTable\n " + + RelOptUtil.toString(newTopNode)); + } + } + + convertOpTree(newTopNode, (RelNode) null); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after nested convertOpTree\n " + RelOptUtil.toString(newTopNode)); + } + + Pair topSelparentPair = HiveCalciteUtil.getTopLevelSelect(newTopNode); + fixTopOBSchema(newTopNode, topSelparentPair, resultSchema); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after fixTopOBSchema\n " + RelOptUtil.toString(newTopNode)); + } + + topSelparentPair = HiveCalciteUtil.getTopLevelSelect(newTopNode); + newTopNode = renameTopLevelSelectInResultSchema(newTopNode, topSelparentPair, resultSchema); + if (LOG.isDebugEnabled()) { + LOG.debug("Final plan after modifier\n " + RelOptUtil.toString(newTopNode)); + } + return newTopNode; + } + + private static void convertOpTree(RelNode rel, RelNode parent) { + + if (rel instanceof HepRelVertex) { + throw new RuntimeException("Found HepRelVertex"); + } else if (rel instanceof Join) { + if (!validJoinParent(rel, parent)) { + introduceDerivedTable(rel, parent); + } + } else if (rel instanceof MultiJoin) { + throw new RuntimeException("Found MultiJoin"); + } else if (rel instanceof RelSubset) { + throw new RuntimeException("Found RelSubset"); + } else if (rel instanceof SetOp) { + // TODO: Handle more than 2 inputs for setop + if (!validSetopParent(rel, parent)) + introduceDerivedTable(rel, parent); + + SetOp setop = (SetOp) rel; + for (RelNode inputRel : setop.getInputs()) { + if (!validSetopChild(inputRel)) { + introduceDerivedTable(inputRel, setop); + } + } + } else if (rel instanceof SingleRel) { + if (rel instanceof Filter) { + if (!validFilterParent(rel, parent)) { + introduceDerivedTable(rel, parent); + } + } else if (rel instanceof HiveSort) { + if (!validSortParent(rel, parent)) { + introduceDerivedTable(rel, parent); + } + if (!validSortChild((HiveSort) rel)) { + introduceDerivedTable(((HiveSort) rel).getInput(), rel); + } + } else if (rel instanceof HiveAggregate) { + RelNode newParent = parent; + if (!validGBParent(rel, parent)) { + newParent = introduceDerivedTable(rel, parent); + } + // check if groupby is empty and there is no other cols in aggr + // this should only happen when newParent is constant. + if (isEmptyGrpAggr(rel)) { + replaceEmptyGroupAggr(rel, newParent); + } + } + } + + List childNodes = rel.getInputs(); + if (childNodes != null) { + for (RelNode r : childNodes) { + convertOpTree(r, rel); + } + } + } + + private static void fixTopOBSchema(final RelNode rootRel, + Pair topSelparentPair, List resultSchema) + throws CalciteSemanticException { + if (!(topSelparentPair.getKey() instanceof Sort) + || !HiveCalciteUtil.orderRelNode(topSelparentPair.getKey())) { + return; + } + HiveSort obRel = (HiveSort) topSelparentPair.getKey(); + Project obChild = (Project) topSelparentPair.getValue(); + if (obChild.getRowType().getFieldCount() <= resultSchema.size()) { + return; + } + + RelDataType rt = obChild.getRowType(); + @SuppressWarnings({ "unchecked", "rawtypes" }) + Set collationInputRefs = new HashSet( + RelCollationImpl.ordinals(obRel.getCollation())); + ImmutableMap.Builder inputRefToCallMapBldr = ImmutableMap.builder(); + for (int i = resultSchema.size(); i < rt.getFieldCount(); i++) { + if (collationInputRefs.contains(i)) { + inputRefToCallMapBldr.put(i, obChild.getChildExps().get(i)); + } + } + ImmutableMap inputRefToCallMap = inputRefToCallMapBldr.build(); + + if ((obChild.getRowType().getFieldCount() - inputRefToCallMap.size()) != resultSchema.size()) { + LOG.error(generateInvalidSchemaMessage(obChild, resultSchema, inputRefToCallMap.size())); + throw new CalciteSemanticException("Result Schema didn't match Optimized Op Tree Schema"); + } + // This removes order-by only expressions from the projections. + HiveProject replacementProjectRel = HiveProject.create(obChild.getInput(), obChild + .getChildExps().subList(0, resultSchema.size()), obChild.getRowType().getFieldNames() + .subList(0, resultSchema.size())); + obRel.replaceInput(0, replacementProjectRel); + obRel.setInputRefToCallMap(inputRefToCallMap); + } + + private static String generateInvalidSchemaMessage(Project topLevelProj, + List resultSchema, int fieldsForOB) { + String errorDesc = "Result Schema didn't match Calcite Optimized Op Tree; schema: "; + for (FieldSchema fs : resultSchema) { + errorDesc += "[" + fs.getName() + ":" + fs.getType() + "], "; + } + errorDesc += " projection fields: "; + for (RexNode exp : topLevelProj.getChildExps()) { + errorDesc += "[" + exp.toString() + ":" + exp.getType() + "], "; + } + if (fieldsForOB != 0) { + errorDesc += fieldsForOB + " fields removed due to ORDER BY "; + } + return errorDesc.substring(0, errorDesc.length() - 2); + } + + private static RelNode renameTopLevelSelectInResultSchema(final RelNode rootRel, + Pair topSelparentPair, List resultSchema) + throws CalciteSemanticException { + RelNode parentOforiginalProjRel = topSelparentPair.getKey(); + HiveProject originalProjRel = (HiveProject) topSelparentPair.getValue(); + + // Assumption: top portion of tree could only be + // (limit)?(OB)?(Project).... + List rootChildExps = originalProjRel.getChildExps(); + if (resultSchema.size() != rootChildExps.size()) { + // Safeguard against potential issues in CBO RowResolver construction. Disable CBO for now. + LOG.error(generateInvalidSchemaMessage(originalProjRel, resultSchema, 0)); + throw new CalciteSemanticException("Result Schema didn't match Optimized Op Tree Schema"); + } + + List newSelAliases = new ArrayList(); + String colAlias; + for (int i = 0; i < rootChildExps.size(); i++) { + colAlias = resultSchema.get(i).getName(); + if (colAlias.startsWith("_")) { + colAlias = colAlias.substring(1); + } + newSelAliases.add(colAlias); + } + + HiveProject replacementProjectRel = HiveProject.create(originalProjRel.getInput(), + originalProjRel.getChildExps(), newSelAliases); + + if (rootRel == originalProjRel) { + return replacementProjectRel; + } else { + parentOforiginalProjRel.replaceInput(0, replacementProjectRel); + return rootRel; + } + } + + private static RelNode introduceDerivedTable(final RelNode rel) { + List projectList = HiveCalciteUtil.getProjsFromBelowAsInputRef(rel); + + HiveProject select = HiveProject.create(rel.getCluster(), rel, projectList, + rel.getRowType(), rel.getCollationList()); + + return select; + } + + private static RelNode introduceDerivedTable(final RelNode rel, RelNode parent) { + int i = 0; + int pos = -1; + List childList = parent.getInputs(); + + for (RelNode child : childList) { + if (child == rel) { + pos = i; + break; + } + i++; + } + + if (pos == -1) { + throw new RuntimeException("Couldn't find child node in parent's inputs"); + } + + RelNode select = introduceDerivedTable(rel); + + parent.replaceInput(pos, select); + + return select; + } + + private static boolean validJoinParent(RelNode joinNode, RelNode parent) { + boolean validParent = true; + + if (parent instanceof Join) { + if (((Join) parent).getRight() == joinNode) { + validParent = false; + } + } else if (parent instanceof SetOp) { + validParent = false; + } + + return validParent; + } + + private static boolean validFilterParent(RelNode filterNode, RelNode parent) { + boolean validParent = true; + + // TOODO: Verify GB having is not a seperate filter (if so we shouldn't + // introduce derived table) + if (parent instanceof Filter || parent instanceof Join + || parent instanceof SetOp) { + validParent = false; + } + + return validParent; + } + + private static boolean validGBParent(RelNode gbNode, RelNode parent) { + boolean validParent = true; + + // TOODO: Verify GB having is not a seperate filter (if so we shouldn't + // introduce derived table) + if (parent instanceof Join || parent instanceof SetOp + || parent instanceof Aggregate + || (parent instanceof Filter && ((Aggregate) gbNode).getGroupSet().isEmpty())) { + validParent = false; + } + + return validParent; + } + + private static boolean validSortParent(RelNode sortNode, RelNode parent) { + boolean validParent = true; + + if (parent != null && !(parent instanceof Project) + && !((parent instanceof Sort) || HiveCalciteUtil.orderRelNode(parent))) + validParent = false; + + return validParent; + } + + private static boolean validSortChild(HiveSort sortNode) { + boolean validChild = true; + RelNode child = sortNode.getInput(); + + if (!(HiveCalciteUtil.limitRelNode(sortNode) && HiveCalciteUtil.orderRelNode(child)) + && !(child instanceof Project)) { + validChild = false; + } + + return validChild; + } + + private static boolean validSetopParent(RelNode setop, RelNode parent) { + boolean validChild = true; + + if (parent != null && !(parent instanceof Project)) { + validChild = false; + } + + return validChild; + } + + private static boolean validSetopChild(RelNode setopChild) { + boolean validChild = true; + + if (!(setopChild instanceof Project)) { + validChild = false; + } + + return validChild; + } + + private static boolean isEmptyGrpAggr(RelNode gbNode) { + // Verify if both groupset and aggrfunction are empty) + Aggregate aggrnode = (Aggregate) gbNode; + if (aggrnode.getGroupSet().isEmpty() && aggrnode.getAggCallList().isEmpty()) { + return true; + } + return false; + } + + private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) { + // If this function is called, the parent should only include constant + List exps = parent.getChildExps(); + for (RexNode rexNode : exps) { + if (rexNode.getKind() != SqlKind.LITERAL) { + throw new RuntimeException("We expect " + parent.toString() + + " to contain only constants. However, " + rexNode.toString() + " is " + + rexNode.getKind()); + } + } + HiveAggregate oldAggRel = (HiveAggregate) rel; + RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory(); + RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory); + RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory); + // Create the dummy aggregation. + SqlAggFunction countFn = (SqlAggFunction) SqlFunctionConverter.getCalciteAggFn("count", + ImmutableList.of(intType), longType); + // TODO: Using 0 might be wrong; might need to walk down to find the + // proper index of a dummy. + List argList = ImmutableList.of(0); + AggregateCall dummyCall = new AggregateCall(countFn, false, argList, longType, null); + Aggregate newAggRel = oldAggRel.copy(oldAggRel.getTraitSet(), oldAggRel.getInput(), + oldAggRel.indicator, oldAggRel.getGroupSet(), oldAggRel.getGroupSets(), + ImmutableList.of(dummyCall)); + RelNode select = introduceDerivedTable(newAggRel); + parent.replaceInput(0, select); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java new file mode 100644 index 0000000..9f786bb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -0,0 +1,425 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.avatica.ByteString; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlCastFunction; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.parse.ParseUtils; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableMap; + +public class RexNodeConverter { + private static final Log LOG = LogFactory.getLog(RexNodeConverter.class); + + private static class InputCtx { + private final RelDataType calciteInpDataType; + private final ImmutableMap hiveNameToPosMap; + private final RowResolver hiveRR; + private final int offsetInCalciteSchema; + + private InputCtx(RelDataType calciteInpDataType, ImmutableMap hiveNameToPosMap, + RowResolver hiveRR, int offsetInCalciteSchema) { + this.calciteInpDataType = calciteInpDataType; + this.hiveNameToPosMap = hiveNameToPosMap; + this.hiveRR = hiveRR; + this.offsetInCalciteSchema = offsetInCalciteSchema; + } + }; + + private final RelOptCluster cluster; + private final ImmutableList inputCtxs; + private final boolean flattenExpr; + + public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, + ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { + this.cluster = cluster; + this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); + this.flattenExpr = flattenExpr; + } + + public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { + this.cluster = cluster; + this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); + this.flattenExpr = flattenExpr; + } + + public RexNode convert(ExprNodeDesc expr) throws SemanticException { + if (expr instanceof ExprNodeNullDesc) { + return createNullLiteral(expr); + } else if (expr instanceof ExprNodeGenericFuncDesc) { + return convert((ExprNodeGenericFuncDesc) expr); + } else if (expr instanceof ExprNodeConstantDesc) { + return convert((ExprNodeConstantDesc) expr); + } else if (expr instanceof ExprNodeColumnDesc) { + return convert((ExprNodeColumnDesc) expr); + } else if (expr instanceof ExprNodeFieldDesc) { + return convert((ExprNodeFieldDesc) expr); + } else { + throw new RuntimeException("Unsupported Expression"); + } + // TODO: handle ExprNodeColumnListDesc + } + + private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { + RexNode rexNode = convert(fieldDesc.getDesc()); + if (rexNode instanceof RexCall) { + // regular case of accessing nested field in a column + return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); + } else { + // This may happen for schema-less tables, where columns are dynamically + // supplied by serdes. + throw new CalciteSemanticException("Unexpected rexnode : " + + rexNode.getClass().getCanonicalName()); + } + } + + private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException { + ExprNodeDesc tmpExprNode; + RexNode tmpRN; + + List childRexNodeLst = new LinkedList(); + Builder argTypeBldr = ImmutableList. builder(); + + // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. + TypeInfo tgtDT = null; + GenericUDF tgtUdf = func.getGenericUDF(); + boolean isNumeric = tgtUdf instanceof GenericUDFBaseNumeric, + isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; + if (isNumeric) { + tgtDT = func.getTypeInfo(); + + assert func.getChildren().size() == 2; + // TODO: checking 2 children is useless, compare already does that. + } else if (isCompare && (func.getChildren().size() == 2)) { + tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) + .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); + } + + + for (ExprNodeDesc childExpr : func.getChildren()) { + tmpExprNode = childExpr; + if (tgtDT != null + && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { + if (isCompare) { + // For compare, we will convert requisite children + tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); + } else if (isNumeric) { + // For numeric, we'll do minimum necessary cast - if we cast to the type + // of expression, bad things will happen. + GenericUDFBaseNumeric numericUdf = (GenericUDFBaseNumeric)tgtUdf; + PrimitiveTypeInfo minArgType = numericUdf.deriveMinArgumentCast(childExpr, tgtDT); + tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType); + } else { + throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare"); + } + + } + argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); + tmpRN = convert(tmpExprNode); + childRexNodeLst.add(tmpRN); + } + + // See if this is an explicit cast. + RexNode expr = null; + RelDataType retType = null; + expr = handleExplicitCast(func, childRexNodeLst); + + if (expr == null) { + // This is not a cast; process the function. + retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); + SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), + func.getGenericUDF(), argTypeBldr.build(), retType); + expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst); + } else { + retType = expr.getType(); + } + + // TODO: Cast Function in Calcite have a bug where it infer type on cast throws + // an exception + if (flattenExpr && (expr instanceof RexCall) + && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { + RexCall call = (RexCall) expr; + expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), + RexUtil.flatten(call.getOperands(), call.getOperator())); + } + + return expr; + } + + private boolean castExprUsingUDFBridge(GenericUDF gUDF) { + boolean castExpr = false; + if (gUDF != null && gUDF instanceof GenericUDFBridge) { + String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName(); + if (udfClassName != null) { + int sp = udfClassName.lastIndexOf('.'); + // TODO: add method to UDFBridge to say if it is a cast func + if (sp >= 0 & (sp + 1) < udfClassName.length()) { + udfClassName = udfClassName.substring(sp + 1); + if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte") + || udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger") + || udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort") + || udfClassName.equals("UDFToFloat") || udfClassName.equals("UDFToString")) + castExpr = true; + } + } + } + + return castExpr; + } + + private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List childRexNodeLst) + throws CalciteSemanticException { + RexNode castExpr = null; + + if (childRexNodeLst != null && childRexNodeLst.size() == 1) { + GenericUDF udf = func.getGenericUDF(); + if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar) + || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) + || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { + castExpr = cluster.getRexBuilder().makeAbstractCast( + TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), + childRexNodeLst.get(0)); + } + } + + return castExpr; + } + + private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { + InputCtx ctxLookingFor = null; + + if (inputCtxs.size() == 1) { + ctxLookingFor = inputCtxs.get(0); + } else { + String tableAlias = col.getTabAlias(); + String colAlias = col.getColumn(); + int noInp = 0; + for (InputCtx ic : inputCtxs) { + if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) { + if (ic.hiveRR.getPosition(colAlias) >= 0) { + ctxLookingFor = ic; + noInp++; + } + } + } + + if (noInp > 1) + throw new RuntimeException("Ambigous column mapping"); + } + + return ctxLookingFor; + } + + protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { + InputCtx ic = getInputCtx(col); + int pos = ic.hiveNameToPosMap.get(col.getColumn()); + return cluster.getRexBuilder().makeInputRef( + ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); + } + + private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE), + MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE); + + protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException { + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); + PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo(); + RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory); + + PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory(); + + ConstantObjectInspector coi = literal.getWritableObjectInspector(); + Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), + coi); + + RexNode calciteLiteral = null; + // TODO: Verify if we need to use ConstantObjectInspector to unwrap data + switch (hiveTypeCategory) { + case BOOLEAN: + calciteLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue()); + break; + case BYTE: + byte[] byteArray = new byte[] { (Byte) value }; + ByteString bs = new ByteString(byteArray); + calciteLiteral = rexBuilder.makeBinaryLiteral(bs); + break; + case SHORT: + calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType); + break; + case INT: + calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value)); + break; + case LONG: + calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value)); + break; + // TODO: is Decimal an exact numeric or approximate numeric? + case DECIMAL: + if (value instanceof HiveDecimal) { + value = ((HiveDecimal) value).bigDecimalValue(); + } else if (value instanceof Decimal128) { + value = ((Decimal128) value).toBigDecimal(); + } + if (value == null) { + // We have found an invalid decimal value while enforcing precision and + // scale. Ideally, + // we would replace it with null here, which is what Hive does. However, + // we need to plumb + // this thru up somehow, because otherwise having different expression + // type in AST causes + // the plan generation to fail after CBO, probably due to some residual + // state in SA/QB. + // For now, we will not run CBO in the presence of invalid decimal + // literals. + throw new CalciteSemanticException("Expression " + literal.getExprString() + + " is not a valid decimal"); + // TODO: return createNullLiteral(literal); + } + BigDecimal bd = (BigDecimal) value; + BigInteger unscaled = bd.unscaledValue(); + if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) { + calciteLiteral = rexBuilder.makeExactLiteral(bd); + } else { + // CBO doesn't support unlimited precision decimals. In practice, this + // will work... + // An alternative would be to throw CboSemanticException and fall back + // to no CBO. + RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, + bd.scale(), unscaled.toString().length()); + calciteLiteral = rexBuilder.makeExactLiteral(bd, relType); + } + break; + case FLOAT: + calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Float) value), calciteDataType); + break; + case DOUBLE: + calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Double) value), calciteDataType); + break; + case CHAR: + if (value instanceof HiveChar) + value = ((HiveChar) value).getValue(); + calciteLiteral = rexBuilder.makeLiteral((String) value); + break; + case VARCHAR: + if (value instanceof HiveVarchar) + value = ((HiveVarchar) value).getValue(); + calciteLiteral = rexBuilder.makeLiteral((String) value); + break; + case STRING: + calciteLiteral = rexBuilder.makeLiteral((String) value); + break; + case DATE: + Calendar cal = new GregorianCalendar(); + cal.setTime((Date) value); + calciteLiteral = rexBuilder.makeDateLiteral(cal); + break; + case TIMESTAMP: + Calendar c = null; + if (value instanceof Calendar) { + c = (Calendar)value; + } else { + c = Calendar.getInstance(); + c.setTimeInMillis(((Timestamp)value).getTime()); + } + calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); + break; + case BINARY: + case VOID: + case UNKNOWN: + default: + throw new RuntimeException("UnSupported Literal"); + } + + return calciteLiteral; + } + + private RexNode createNullLiteral(ExprNodeDesc expr) throws CalciteSemanticException { + return cluster.getRexBuilder().makeNullLiteral( + TypeConverter.convert(expr.getTypeInfo(), cluster.getTypeFactory()).getSqlTypeName()); + } + + public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode, + List inputRels, LinkedHashMap relToHiveRR, + Map> relToHiveColNameCalcitePosMap, boolean flattenExpr) + throws SemanticException { + List inputCtxLst = new ArrayList(); + + int offSet = 0; + for (RelNode r : inputRels) { + inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameCalcitePosMap.get(r), relToHiveRR + .get(r), offSet)); + offSet += r.getRowType().getFieldCount(); + } + + return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java new file mode 100644 index 0000000..6a4150c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -0,0 +1,402 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.lang.annotation.Annotation; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.util.Util; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPositive; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class SqlFunctionConverter { + private static final Log LOG = LogFactory.getLog(SqlFunctionConverter.class); + + static final Map hiveToCalcite; + static final Map calciteToHiveToken; + static final Map reverseOperatorMap; + + static { + StaticBlockBuilder builder = new StaticBlockBuilder(); + hiveToCalcite = ImmutableMap.copyOf(builder.hiveToCalcite); + calciteToHiveToken = ImmutableMap.copyOf(builder.calciteToHiveToken); + reverseOperatorMap = ImmutableMap.copyOf(builder.reverseOperatorMap); + } + + public static SqlOperator getCalciteOperator(String funcTextName, GenericUDF hiveUDF, + ImmutableList calciteArgTypes, RelDataType retType) throws CalciteSemanticException { + // handle overloaded methods first + if (hiveUDF instanceof GenericUDFOPNegative) { + return SqlStdOperatorTable.UNARY_MINUS; + } else if (hiveUDF instanceof GenericUDFOPPositive) { + return SqlStdOperatorTable.UNARY_PLUS; + } // do generic lookup + String name = null; + if (StringUtils.isEmpty(funcTextName)) { + name = getName(hiveUDF); // this should probably never happen, see getName + // comment + LOG.warn("The function text was empty, name from annotation is " + name); + } else { + // We could just do toLowerCase here and let SA qualify it, but let's be + // proper... + name = FunctionRegistry.getNormalizedFunctionName(funcTextName); + } + return getCalciteFn(name, calciteArgTypes, retType); + } + + public static GenericUDF getHiveUDF(SqlOperator op, RelDataType dt, int argsLength) { + String name = reverseOperatorMap.get(op); + if (name == null) { + name = op.getName(); + } + // Make sure we handle unary + and - correctly. + if (argsLength == 1) { + if (name == "+") { + name = FunctionRegistry.UNARY_PLUS_FUNC_NAME; + } else if (name == "-") { + name = FunctionRegistry.UNARY_MINUS_FUNC_NAME; + } + } + FunctionInfo hFn; + try { + hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } + if (hFn == null) { + try { + hFn = handleExplicitCast(op, dt); + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } + } + return hFn == null ? null : hFn.getGenericUDF(); + } + + private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) throws SemanticException { + FunctionInfo castUDF = null; + + if (op.kind == SqlKind.CAST) { + TypeInfo castType = TypeConverter.convert(dt); + + if (castType.equals(TypeInfoFactory.byteTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("tinyint"); + } else if (castType instanceof CharTypeInfo) { + castUDF = handleCastForParameterizedType(castType, FunctionRegistry.getFunctionInfo("char")); + } else if (castType instanceof VarcharTypeInfo) { + castUDF = handleCastForParameterizedType(castType, + FunctionRegistry.getFunctionInfo("varchar")); + } else if (castType.equals(TypeInfoFactory.stringTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("string"); + } else if (castType.equals(TypeInfoFactory.booleanTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("boolean"); + } else if (castType.equals(TypeInfoFactory.shortTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("smallint"); + } else if (castType.equals(TypeInfoFactory.intTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("int"); + } else if (castType.equals(TypeInfoFactory.longTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("bigint"); + } else if (castType.equals(TypeInfoFactory.floatTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("float"); + } else if (castType.equals(TypeInfoFactory.doubleTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("double"); + } else if (castType.equals(TypeInfoFactory.timestampTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("timestamp"); + } else if (castType.equals(TypeInfoFactory.dateTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("date"); + } else if (castType instanceof DecimalTypeInfo) { + castUDF = handleCastForParameterizedType(castType, + FunctionRegistry.getFunctionInfo("decimal")); + } else if (castType.equals(TypeInfoFactory.binaryTypeInfo)) { + castUDF = FunctionRegistry.getFunctionInfo("binary"); + } else + throw new IllegalStateException("Unexpected type : " + castType.getQualifiedName()); + } + + return castUDF; + } + + private static FunctionInfo handleCastForParameterizedType(TypeInfo ti, FunctionInfo fi) { + SettableUDF udf = (SettableUDF) fi.getGenericUDF(); + try { + udf.setTypeInfo(ti); + } catch (UDFArgumentException e) { + throw new RuntimeException(e); + } + return new FunctionInfo(fi.isNative(), fi.getDisplayName(), (GenericUDF) udf); + } + + // TODO: 1) handle Agg Func Name translation 2) is it correct to add func args + // as child of func? + public static ASTNode buildAST(SqlOperator op, List children) { + HiveToken hToken = calciteToHiveToken.get(op); + ASTNode node; + if (hToken != null) { + node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text); + } else { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); + if (op.kind != SqlKind.CAST) { + if (op.kind == SqlKind.MINUS_PREFIX) { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.MINUS, "MINUS"); + } else if (op.kind == SqlKind.PLUS_PREFIX) { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.PLUS, "PLUS"); + } else { + if (op.getName().toUpperCase().equals(SqlStdOperatorTable.COUNT.getName()) + && children.size() == 0) { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, + "TOK_FUNCTIONSTAR"); + } + node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName())); + } + } + } + + for (ASTNode c : children) { + ParseDriver.adaptor.addChild(node, c); + } + return node; + } + + /** + * Build AST for flattened Associative expressions ('and', 'or'). Flattened + * expressions is of the form or[x,y,z] which is originally represented as + * "or[x, or[y, z]]". + */ + public static ASTNode buildAST(SqlOperator op, List children, int i) { + if (i + 1 < children.size()) { + HiveToken hToken = calciteToHiveToken.get(op); + ASTNode curNode = ((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); + ParseDriver.adaptor.addChild(curNode, children.get(i)); + ParseDriver.adaptor.addChild(curNode, buildAST(op, children, i + 1)); + return curNode; + } else { + return children.get(i); + } + + } + + // TODO: this is not valid. Function names for built-in UDFs are specified in + // FunctionRegistry, + // and only happen to match annotations. For user UDFs, the name is what user + // specifies at + // creation time (annotation can be absent, different, or duplicate some other + // function). + private static String getName(GenericUDF hiveUDF) { + String udfName = null; + if (hiveUDF instanceof GenericUDFBridge) { + udfName = ((GenericUDFBridge) hiveUDF).getUdfName(); + } else { + Class udfClass = hiveUDF.getClass(); + Annotation udfAnnotation = udfClass.getAnnotation(Description.class); + + if (udfAnnotation != null && udfAnnotation instanceof Description) { + Description udfDescription = (Description) udfAnnotation; + udfName = udfDescription.name(); + if (udfName != null) { + String[] aliases = udfName.split(","); + if (aliases.length > 0) + udfName = aliases[0]; + } + } + + if (udfName == null || udfName.isEmpty()) { + udfName = hiveUDF.getClass().getName(); + int indx = udfName.lastIndexOf("."); + if (indx >= 0) { + indx += 1; + udfName = udfName.substring(indx); + } + } + } + + return udfName; + } + + /** This class is used to build immutable hashmaps in the static block above. */ + private static class StaticBlockBuilder { + final Map hiveToCalcite = Maps.newHashMap(); + final Map calciteToHiveToken = Maps.newHashMap(); + final Map reverseOperatorMap = Maps.newHashMap(); + + StaticBlockBuilder() { + registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+")); + registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-")); + registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*")); + registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.STAR, "/")); + registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.STAR, "%")); + registerFunction("and", SqlStdOperatorTable.AND, hToken(HiveParser.KW_AND, "and")); + registerFunction("or", SqlStdOperatorTable.OR, hToken(HiveParser.KW_OR, "or")); + registerFunction("=", SqlStdOperatorTable.EQUALS, hToken(HiveParser.EQUAL, "=")); + registerFunction("<", SqlStdOperatorTable.LESS_THAN, hToken(HiveParser.LESSTHAN, "<")); + registerFunction("<=", SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + hToken(HiveParser.LESSTHANOREQUALTO, "<=")); + registerFunction(">", SqlStdOperatorTable.GREATER_THAN, hToken(HiveParser.GREATERTHAN, ">")); + registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, + hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); + registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); + registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); + } + + private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { + reverseOperatorMap.put(calciteFn, name); + FunctionInfo hFn; + try { + hFn = FunctionRegistry.getFunctionInfo(name); + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } + if (hFn != null) { + String hFnName = getName(hFn.getGenericUDF()); + hiveToCalcite.put(hFnName, calciteFn); + + if (hiveToken != null) { + calciteToHiveToken.put(calciteFn, hiveToken); + } + } + } + } + + private static HiveToken hToken(int type, String text) { + return new HiveToken(type, text); + } + + public static class CalciteUDAF extends SqlAggFunction { + public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, + ImmutableList argTypes, RelDataType retType) { + super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference, + operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION); + } + } + + private static class CalciteUDFInfo { + private String udfName; + private SqlReturnTypeInference returnTypeInference; + private SqlOperandTypeInference operandTypeInference; + private SqlOperandTypeChecker operandTypeChecker; + private ImmutableList argTypes; + private RelDataType retType; + } + + private static CalciteUDFInfo getUDFInfo(String hiveUdfName, + ImmutableList calciteArgTypes, RelDataType calciteRetType) { + CalciteUDFInfo udfInfo = new CalciteUDFInfo(); + udfInfo.udfName = hiveUdfName; + udfInfo.returnTypeInference = ReturnTypes.explicit(calciteRetType); + udfInfo.operandTypeInference = InferTypes.explicit(calciteArgTypes); + ImmutableList.Builder typeFamilyBuilder = new ImmutableList.Builder(); + for (RelDataType at : calciteArgTypes) { + typeFamilyBuilder.add(Util.first(at.getSqlTypeName().getFamily(), SqlTypeFamily.ANY)); + } + udfInfo.operandTypeChecker = OperandTypes.family(typeFamilyBuilder.build()); + + udfInfo.argTypes = ImmutableList. copyOf(calciteArgTypes); + udfInfo.retType = calciteRetType; + + return udfInfo; + } + + public static SqlOperator getCalciteFn(String hiveUdfName, + ImmutableList calciteArgTypes, RelDataType calciteRetType) + throws CalciteSemanticException { + + if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) { + // We can create Calcite IS_DISTINCT_FROM operator for this. But since our + // join reordering algo cant handle this anyway there is no advantage of + // this. + // So, bail out for now. + throw new CalciteSemanticException("<=> is not yet supported for cbo."); + } + SqlOperator calciteOp = hiveToCalcite.get(hiveUdfName); + if (calciteOp == null) { + CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); + calciteOp = new SqlFunction(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker, + SqlFunctionCategory.USER_DEFINED_FUNCTION); + } + + return calciteOp; + } + + public static SqlAggFunction getCalciteAggFn(String hiveUdfName, + ImmutableList calciteArgTypes, RelDataType calciteRetType) { + SqlAggFunction calciteAggFn = (SqlAggFunction) hiveToCalcite.get(hiveUdfName); + if (calciteAggFn == null) { + CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); + + calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, uInf.operandTypeInference, + uInf.operandTypeChecker, uInf.argTypes, uInf.retType); + } + + return calciteAggFn; + } + + static class HiveToken { + int type; + String text; + String[] args; + + HiveToken(int type, String text, String... args) { + this.type = type; + this.text = text; + this.args = args; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java new file mode 100644 index 0000000..88c989f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -0,0 +1,327 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; +import com.google.common.collect.Lists; + +public class TypeConverter { + private static final Map calciteToHiveTypeNameMap; + + // TODO: Handling of char[], varchar[], string... + static { + Builder b = ImmutableMap. builder(); + b.put(SqlTypeName.BOOLEAN.getName(), new HiveToken(HiveParser.TOK_BOOLEAN, "TOK_BOOLEAN")); + b.put(SqlTypeName.TINYINT.getName(), new HiveToken(HiveParser.TOK_TINYINT, "TOK_TINYINT")); + b.put(SqlTypeName.SMALLINT.getName(), new HiveToken(HiveParser.TOK_SMALLINT, "TOK_SMALLINT")); + b.put(SqlTypeName.INTEGER.getName(), new HiveToken(HiveParser.TOK_INT, "TOK_INT")); + b.put(SqlTypeName.BIGINT.getName(), new HiveToken(HiveParser.TOK_BIGINT, "TOK_BIGINT")); + b.put(SqlTypeName.FLOAT.getName(), new HiveToken(HiveParser.TOK_FLOAT, "TOK_FLOAT")); + b.put(SqlTypeName.DOUBLE.getName(), new HiveToken(HiveParser.TOK_DOUBLE, "TOK_DOUBLE")); + b.put(SqlTypeName.DATE.getName(), new HiveToken(HiveParser.TOK_DATE, "TOK_DATE")); + b.put(SqlTypeName.TIMESTAMP.getName(), new HiveToken(HiveParser.TOK_TIMESTAMP, "TOK_TIMESTAMP")); + b.put(SqlTypeName.BINARY.getName(), new HiveToken(HiveParser.TOK_BINARY, "TOK_BINARY")); + calciteToHiveTypeNameMap = b.build(); + }; + + /*********************** Convert Hive Types To Calcite Types ***********************/ + public static RelDataType getType(RelOptCluster cluster, + List cInfoLst) throws CalciteSemanticException { + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + + for (ColumnInfo ci : cInfoLst) { + fieldTypes.add(convert(ci.getType(), dtFactory)); + fieldNames.add(ci.getInternalName()); + } + return dtFactory.createStructType(fieldTypes, fieldNames); + } + + public static RelDataType getType(RelOptCluster cluster, RowResolver rr, + List neededCols) throws CalciteSemanticException { + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); + RowSchema rs = rr.getRowSchema(); + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + + for (ColumnInfo ci : rs.getSignature()) { + if (neededCols == null || neededCols.contains(ci.getInternalName())) { + fieldTypes.add(convert(ci.getType(), dtFactory)); + fieldNames.add(ci.getInternalName()); + } + } + return dtFactory.createStructType(fieldTypes, fieldNames); + } + + public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) + throws CalciteSemanticException{ + RelDataType convertedType = null; + + switch (type.getCategory()) { + case PRIMITIVE: + convertedType = convert((PrimitiveTypeInfo) type, dtFactory); + break; + case LIST: + convertedType = convert((ListTypeInfo) type, dtFactory); + break; + case MAP: + convertedType = convert((MapTypeInfo) type, dtFactory); + break; + case STRUCT: + convertedType = convert((StructTypeInfo) type, dtFactory); + break; + case UNION: + convertedType = convert((UnionTypeInfo) type, dtFactory); + break; + } + return convertedType; + } + + public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) { + RelDataType convertedType = null; + + switch (type.getPrimitiveCategory()) { + case VOID: + convertedType = dtFactory.createSqlType(SqlTypeName.NULL); + break; + case BOOLEAN: + convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN); + break; + case BYTE: + convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT); + break; + case SHORT: + convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT); + break; + case INT: + convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER); + break; + case LONG: + convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT); + break; + case FLOAT: + convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT); + break; + case DOUBLE: + convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); + break; + case STRING: + convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); + break; + case DATE: + convertedType = dtFactory.createSqlType(SqlTypeName.DATE); + break; + case TIMESTAMP: + convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP); + break; + case BINARY: + convertedType = dtFactory.createSqlType(SqlTypeName.BINARY); + break; + case DECIMAL: + DecimalTypeInfo dtInf = (DecimalTypeInfo) type; + convertedType = dtFactory + .createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); + break; + case VARCHAR: + convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, + ((BaseCharTypeInfo) type).getLength()); + break; + case CHAR: + convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, + ((BaseCharTypeInfo) type).getLength()); + break; + case UNKNOWN: + convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); + break; + } + + if (null == convertedType) { + throw new RuntimeException("Unsupported Type : " + type.getTypeName()); + } + + return dtFactory.createTypeWithNullability(convertedType, true); + } + + public static RelDataType convert(ListTypeInfo lstType, + RelDataTypeFactory dtFactory) throws CalciteSemanticException { + RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory); + return dtFactory.createArrayType(elemType, -1); + } + + public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) + throws CalciteSemanticException { + RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory); + RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory); + return dtFactory.createMapType(keyType, valueType); + } + + public static RelDataType convert(StructTypeInfo structType, + final RelDataTypeFactory dtFactory) throws CalciteSemanticException { + List fTypes = new ArrayList(structType.getAllStructFieldTypeInfos().size()); + for (TypeInfo ti : structType.getAllStructFieldTypeInfos()) { + fTypes.add(convert(ti,dtFactory)); + } + return dtFactory.createStructType(fTypes, structType.getAllStructFieldNames()); + } + + public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) + throws CalciteSemanticException{ + // Union type is not supported in Calcite. + throw new CalciteSemanticException("Union type is not supported"); + } + + public static TypeInfo convert(RelDataType rType) { + if (rType.isStruct()) { + return convertStructType(rType); + } else if (rType.getComponentType() != null) { + return convertListType(rType); + } else if (rType.getKeyType() != null) { + return convertMapType(rType); + } else { + return convertPrimtiveType(rType); + } + } + + public static TypeInfo convertStructType(RelDataType rType) { + List fTypes = Lists.transform(rType.getFieldList(), + new Function() { + @Override + public TypeInfo apply(RelDataTypeField f) { + return convert(f.getType()); + } + }); + List fNames = Lists.transform(rType.getFieldList(), + new Function() { + @Override + public String apply(RelDataTypeField f) { + return f.getName(); + } + }); + return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); + } + + public static TypeInfo convertMapType(RelDataType rType) { + return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), + convert(rType.getValueType())); + } + + public static TypeInfo convertListType(RelDataType rType) { + return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); + } + + public static TypeInfo convertPrimtiveType(RelDataType rType) { + switch (rType.getSqlTypeName()) { + case BOOLEAN: + return TypeInfoFactory.booleanTypeInfo; + case TINYINT: + return TypeInfoFactory.byteTypeInfo; + case SMALLINT: + return TypeInfoFactory.shortTypeInfo; + case INTEGER: + return TypeInfoFactory.intTypeInfo; + case BIGINT: + return TypeInfoFactory.longTypeInfo; + case FLOAT: + return TypeInfoFactory.floatTypeInfo; + case DOUBLE: + return TypeInfoFactory.doubleTypeInfo; + case DATE: + return TypeInfoFactory.dateTypeInfo; + case TIMESTAMP: + return TypeInfoFactory.timestampTypeInfo; + case BINARY: + return TypeInfoFactory.binaryTypeInfo; + case DECIMAL: + return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); + case VARCHAR: + if (rType.getPrecision() == Integer.MAX_VALUE) + return TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + else + return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); + case CHAR: + return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); + case OTHER: + default: + return TypeInfoFactory.voidTypeInfo; + } + + } + + /*********************** Convert Calcite Types To Hive Types ***********************/ + public static HiveToken hiveToken(RelDataType calciteType) { + HiveToken ht = null; + + switch (calciteType.getSqlTypeName()) { + case CHAR: { + ht = new HiveToken(HiveParser.TOK_CHAR, "TOK_CHAR", String.valueOf(calciteType.getPrecision())); + } + break; + case VARCHAR: { + if (calciteType.getPrecision() == Integer.MAX_VALUE) + ht = new HiveToken(HiveParser.TOK_STRING, "TOK_STRING", String.valueOf(calciteType + .getPrecision())); + else + ht = new HiveToken(HiveParser.TOK_VARCHAR, "TOK_VARCHAR", String.valueOf(calciteType + .getPrecision())); + } + break; + case DECIMAL: { + ht = new HiveToken(HiveParser.TOK_DECIMAL, "TOK_DECIMAL", String.valueOf(calciteType + .getPrecision()), String.valueOf(calciteType.getScale())); + } + break; + default: + ht = calciteToHiveTypeNameMap.get(calciteType.getSqlTypeName().getName()); + } + + return ht; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java deleted file mode 100644 index e9e052f..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq; - -import com.google.common.collect.ImmutableList; - -import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdDistinctRowCount; -import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdRowCount; -import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdSelectivity; -import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdUniqueKeys; -import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; -import org.eigenbase.rel.metadata.DefaultRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMetadataProvider; - -public class HiveDefaultRelMetadataProvider { - private HiveDefaultRelMetadataProvider() { - } - - public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList - .of(HiveRelMdDistinctRowCount.SOURCE, - HiveRelMdSelectivity.SOURCE, - HiveRelMdRowCount.SOURCE, - HiveRelMdUniqueKeys.SOURCE, - new DefaultRelMetadataProvider())); -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java deleted file mode 100644 index 80f657e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java +++ /dev/null @@ -1,530 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.eigenbase.rel.RelFactories.ProjectFactory; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.SortRel; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexNode; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.sql.fun.SqlStdOperatorTable; -import org.eigenbase.sql.validate.SqlValidatorUtil; -import org.eigenbase.util.Pair; - -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; - -/** - * Generic utility functions needed for Optiq based Hive CBO. - */ - -public class HiveOptiqUtil { - - /** - * Get list of virtual columns from the given list of projections. - *

- * - * @param exps - * list of rex nodes representing projections - * @return List of Virtual Columns, will not be null. - */ - public static List getVirtualCols(List exps) { - List vCols = new ArrayList(); - - for (int i = 0; i < exps.size(); i++) { - if (!(exps.get(i) instanceof RexInputRef)) { - vCols.add(i); - } - } - - return vCols; - } - - public static boolean validateASTForUnsupportedTokens(ASTNode ast) { - String astTree = ast.toStringTree(); - // if any of following tokens are present in AST, bail out - String[] tokens = { "TOK_CHARSETLITERAL","TOK_TABLESPLITSAMPLE" }; - for (String token : tokens) { - if (astTree.contains(token)) { - return false; - } - } - return true; - } - - public static List getProjsFromBelowAsInputRef(final RelNode rel) { - List projectList = Lists.transform(rel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField field) { - return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex()); - } - }); - return projectList; - } - - public static List translateBitSetToProjIndx(BitSet projBitSet) { - List projIndxLst = new ArrayList(); - - for (int i = 0; i < projBitSet.length(); i++) { - if (projBitSet.get(i)) { - projIndxLst.add(i); - } - } - - return projIndxLst; - } - - /** - * Push any equi join conditions that are not column references as Projections - * on top of the children. - * - * @param factory - * Project factory to use. - * @param inputRels - * inputs to a join - * @param leftJoinKeys - * expressions for LHS of join key - * @param rightJoinKeys - * expressions for RHS of join key - * @param systemColCount - * number of system columns, usually zero. These columns are - * projected at the leading edge of the output row. - * @param leftKeys - * on return this contains the join key positions from the new - * project rel on the LHS. - * @param rightKeys - * on return this contains the join key positions from the new - * project rel on the RHS. - * @return the join condition after the equi expressions pushed down. - */ - public static RexNode projectNonColumnEquiConditions(ProjectFactory factory, RelNode[] inputRels, - List leftJoinKeys, List rightJoinKeys, int systemColCount, - List leftKeys, List rightKeys) { - RelNode leftRel = inputRels[0]; - RelNode rightRel = inputRels[1]; - RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder(); - RexNode outJoinCond = null; - - int origLeftInputSize = leftRel.getRowType().getFieldCount(); - int origRightInputSize = rightRel.getRowType().getFieldCount(); - - List newLeftFields = new ArrayList(); - List newLeftFieldNames = new ArrayList(); - - List newRightFields = new ArrayList(); - List newRightFieldNames = new ArrayList(); - int leftKeyCount = leftJoinKeys.size(); - int i; - - for (i = 0; i < origLeftInputSize; i++) { - final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i); - newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i)); - newLeftFieldNames.add(field.getName()); - } - - for (i = 0; i < origRightInputSize; i++) { - final RelDataTypeField field = rightRel.getRowType().getFieldList().get(i); - newRightFields.add(rexBuilder.makeInputRef(field.getType(), i)); - newRightFieldNames.add(field.getName()); - } - - int newKeyCount = 0; - List> origColEqConds = new ArrayList>(); - for (i = 0; i < leftKeyCount; i++) { - RexNode leftKey = leftJoinKeys.get(i); - RexNode rightKey = rightJoinKeys.get(i); - - if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) { - origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(), - ((RexInputRef) rightKey).getIndex())); - } else { - newLeftFields.add(leftKey); - newLeftFieldNames.add(null); - newRightFields.add(rightKey); - newRightFieldNames.add(null); - newKeyCount++; - } - } - - for (i = 0; i < origColEqConds.size(); i++) { - Pair p = origColEqConds.get(i); - RexNode leftKey = leftJoinKeys.get(i); - RexNode rightKey = rightJoinKeys.get(i); - leftKeys.add(p.left); - rightKeys.add(p.right); - RexNode cond = rexBuilder.makeCall( - SqlStdOperatorTable.EQUALS, - rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left), - rexBuilder.makeInputRef(rightKey.getType(), systemColCount + origLeftInputSize - + newKeyCount + p.right)); - if (outJoinCond == null) { - outJoinCond = cond; - } else { - outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond); - } - } - - if (newKeyCount == 0) { - return outJoinCond; - } - - int newLeftOffset = systemColCount + origLeftInputSize; - int newRightOffset = systemColCount + origLeftInputSize + origRightInputSize + newKeyCount; - for (i = 0; i < newKeyCount; i++) { - leftKeys.add(origLeftInputSize + i); - rightKeys.add(origRightInputSize + i); - RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, - rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newLeftOffset + i), - rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newRightOffset + i)); - if (outJoinCond == null) { - outJoinCond = cond; - } else { - outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond); - } - } - - // added project if need to produce new keys than the original input - // fields - if (newKeyCount > 0) { - leftRel = factory.createProject(leftRel, newLeftFields, - SqlValidatorUtil.uniquify(newLeftFieldNames)); - rightRel = factory.createProject(rightRel, newRightFields, - SqlValidatorUtil.uniquify(newRightFieldNames)); - } - - inputRels[0] = leftRel; - inputRels[1] = rightRel; - - return outJoinCond; - } - - /** - * JoinPredicateInfo represents Join condition; JoinPredicate Info uses - * JoinLeafPredicateInfo to represent individual conjunctive elements in the - * predicate.
- * JoinPredicateInfo = JoinLeafPredicateInfo1 and JoinLeafPredicateInfo2...
- *

- * JoinPredicateInfo:
- * 1. preserves the order of conjuctive elements for - * equi-join(equiJoinPredicateElements)
- * 2. Stores set of projection indexes from left and right child which is part - * of equi join keys; the indexes are both in child and Join node schema.
- * 3. Keeps a map of projection indexes that are part of join keys to list of - * conjuctive elements(JoinLeafPredicateInfo) that uses them. - * - */ - public static class JoinPredicateInfo { - private final ImmutableList nonEquiJoinPredicateElements; - private final ImmutableList equiJoinPredicateElements; - private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema; - private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema; - private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema; - private final ImmutableMap> mapOfProjIndxInJoinSchemaToLeafPInfo; - - public JoinPredicateInfo(List nonEquiJoinPredicateElements, - List equiJoinPredicateElements, - Set projsFromLeftPartOfJoinKeysInChildSchema, - Set projsFromRightPartOfJoinKeysInChildSchema, - Set projsFromRightPartOfJoinKeysInJoinSchema, - Map> mapOfProjIndxInJoinSchemaToLeafPInfo) { - this.nonEquiJoinPredicateElements = ImmutableList.copyOf(nonEquiJoinPredicateElements); - this.equiJoinPredicateElements = ImmutableList.copyOf(equiJoinPredicateElements); - this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet - .copyOf(projsFromLeftPartOfJoinKeysInChildSchema); - this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet - .copyOf(projsFromRightPartOfJoinKeysInChildSchema); - this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet - .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); - this.mapOfProjIndxInJoinSchemaToLeafPInfo = ImmutableMap - .copyOf(mapOfProjIndxInJoinSchemaToLeafPInfo); - } - - public List getNonEquiJoinPredicateElements() { - return this.nonEquiJoinPredicateElements; - } - - public List getEquiJoinPredicateElements() { - return this.equiJoinPredicateElements; - } - - public Set getProjsFromLeftPartOfJoinKeysInChildSchema() { - return this.projsFromLeftPartOfJoinKeysInChildSchema; - } - - public Set getProjsFromRightPartOfJoinKeysInChildSchema() { - return this.projsFromRightPartOfJoinKeysInChildSchema; - } - - /** - * NOTE: Join Schema = left Schema + (right Schema offset by - * left.fieldcount). Hence its ok to return projections from left in child - * schema. - */ - public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() { - return this.projsFromLeftPartOfJoinKeysInChildSchema; - } - - public Set getProjsFromRightPartOfJoinKeysInJoinSchema() { - return this.projsFromRightPartOfJoinKeysInJoinSchema; - } - - public Map> getMapOfProjIndxToLeafPInfo() { - return this.mapOfProjIndxInJoinSchemaToLeafPInfo; - } - - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j) { - return constructJoinPredicateInfo(j, j.getCondition()); - } - - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j, RexNode predicate) { - JoinPredicateInfo jpi = null; - JoinLeafPredicateInfo jlpi = null; - List equiLPIList = new ArrayList(); - List nonEquiLPIList = new ArrayList(); - Set projsFromLeftPartOfJoinKeys = new HashSet(); - Set projsFromRightPartOfJoinKeys = new HashSet(); - Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet(); - Map> tmpMapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>(); - Map> mapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>(); - List tmpJLPILst = null; - int rightOffSet = j.getLeft().getRowType().getFieldCount(); - int projIndxInJoin; - List conjuctiveElements; - - // 1. Decompose Join condition to a number of leaf predicates - // (conjuctive elements) - conjuctiveElements = RelOptUtil.conjunctions(predicate); - - // 2. Walk through leaf predicates building up JoinLeafPredicateInfo - for (RexNode ce : conjuctiveElements) { - // 2.1 Construct JoinLeafPredicateInfo - jlpi = JoinLeafPredicateInfo.constructJoinLeafPredicateInfo(j, ce); - - // 2.2 Classify leaf predicate as Equi vs Non Equi - if (jlpi.comparisonType.equals(SqlKind.EQUALS)) { - equiLPIList.add(jlpi); - } else { - nonEquiLPIList.add(jlpi); - } - - // 2.3 Maintain join keys coming from left vs right (in child & - // Join Schema) - projsFromLeftPartOfJoinKeys.addAll(jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()); - projsFromRightPartOfJoinKeys.addAll(jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()); - projsFromRightPartOfJoinKeysInJoinSchema.addAll(jlpi - .getProjsFromRightPartOfJoinKeysInJoinSchema()); - - // 2.4 Update Join Key to JoinLeafPredicateInfo map with keys - // from left - for (Integer projIndx : jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { - tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndx); - if (tmpJLPILst == null) - tmpJLPILst = new ArrayList(); - tmpJLPILst.add(jlpi); - tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndx, tmpJLPILst); - } - - // 2.5 Update Join Key to JoinLeafPredicateInfo map with keys - // from right - for (Integer projIndx : jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { - projIndxInJoin = projIndx + rightOffSet; - tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndxInJoin); - if (tmpJLPILst == null) - tmpJLPILst = new ArrayList(); - tmpJLPILst.add(jlpi); - tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndxInJoin, tmpJLPILst); - } - - } - - // 3. Update Update Join Key to List to use - // ImmutableList - for (Entry> e : tmpMapOfProjIndxInJoinSchemaToLeafPInfo - .entrySet()) { - mapOfProjIndxInJoinSchemaToLeafPInfo.put(e.getKey(), ImmutableList.copyOf(e.getValue())); - } - - // 4. Construct JoinPredicateInfo - jpi = new JoinPredicateInfo(nonEquiLPIList, equiLPIList, projsFromLeftPartOfJoinKeys, - projsFromRightPartOfJoinKeys, projsFromRightPartOfJoinKeysInJoinSchema, - mapOfProjIndxInJoinSchemaToLeafPInfo); - return jpi; - } - } - - /** - * JoinLeafPredicateInfo represents leaf predicate in Join condition - * (conjuctive lement).
- *

- * JoinLeafPredicateInfo:
- * 1. Stores list of expressions from left and right child which is part of - * equi join keys.
- * 2. Stores set of projection indexes from left and right child which is part - * of equi join keys; the indexes are both in child and Join node schema.
- */ - public static class JoinLeafPredicateInfo { - private final SqlKind comparisonType; - private final ImmutableList joinKeyExprsFromLeft; - private final ImmutableList joinKeyExprsFromRight; - private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema; - private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema; - private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema; - - public JoinLeafPredicateInfo(SqlKind comparisonType, List joinKeyExprsFromLeft, - List joinKeyExprsFromRight, Set projsFromLeftPartOfJoinKeysInChildSchema, - Set projsFromRightPartOfJoinKeysInChildSchema, - Set projsFromRightPartOfJoinKeysInJoinSchema) { - this.comparisonType = comparisonType; - this.joinKeyExprsFromLeft = ImmutableList.copyOf(joinKeyExprsFromLeft); - this.joinKeyExprsFromRight = ImmutableList.copyOf(joinKeyExprsFromRight); - this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet - .copyOf(projsFromLeftPartOfJoinKeysInChildSchema); - this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet - .copyOf(projsFromRightPartOfJoinKeysInChildSchema); - this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet - .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); - } - - public List getJoinKeyExprsFromLeft() { - return this.joinKeyExprsFromLeft; - } - - public List getJoinKeyExprsFromRight() { - return this.joinKeyExprsFromRight; - } - - public Set getProjsFromLeftPartOfJoinKeysInChildSchema() { - return this.projsFromLeftPartOfJoinKeysInChildSchema; - } - - /** - * NOTE: Join Schema = left Schema + (right Schema offset by - * left.fieldcount). Hence its ok to return projections from left in child - * schema. - */ - public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() { - return this.projsFromLeftPartOfJoinKeysInChildSchema; - } - - public Set getProjsFromRightPartOfJoinKeysInChildSchema() { - return this.projsFromRightPartOfJoinKeysInChildSchema; - } - - public Set getProjsFromRightPartOfJoinKeysInJoinSchema() { - return this.projsFromRightPartOfJoinKeysInJoinSchema; - } - - private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoinRel j, RexNode pe) { - JoinLeafPredicateInfo jlpi = null; - List filterNulls = new ArrayList(); - List joinKeyExprsFromLeft = new ArrayList(); - List joinKeyExprsFromRight = new ArrayList(); - Set projsFromLeftPartOfJoinKeysInChildSchema = new HashSet(); - Set projsFromRightPartOfJoinKeysInChildSchema = new HashSet(); - Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet(); - int rightOffSet = j.getLeft().getRowType().getFieldCount(); - - // 1. Split leaf join predicate to expressions from left, right - RelOptUtil.splitJoinCondition(j.getSystemFieldList(), j.getLeft(), j.getRight(), pe, - joinKeyExprsFromLeft, joinKeyExprsFromRight, filterNulls, null); - - // 2. For left expressions, collect child projection indexes used - InputReferencedVisitor irvLeft = new InputReferencedVisitor(); - irvLeft.apply(joinKeyExprsFromLeft); - projsFromLeftPartOfJoinKeysInChildSchema.addAll(irvLeft.inputPosReferenced); - - // 3. For right expressions, collect child projection indexes used - InputReferencedVisitor irvRight = new InputReferencedVisitor(); - irvRight.apply(joinKeyExprsFromRight); - projsFromRightPartOfJoinKeysInChildSchema.addAll(irvRight.inputPosReferenced); - - // 3. Translate projection indexes from right to join schema, by adding - // offset. - for (Integer indx : projsFromRightPartOfJoinKeysInChildSchema) { - projsFromRightPartOfJoinKeysInJoinSchema.add(indx + rightOffSet); - } - - // 4. Construct JoinLeafPredicateInfo - jlpi = new JoinLeafPredicateInfo(pe.getKind(), joinKeyExprsFromLeft, joinKeyExprsFromRight, - projsFromLeftPartOfJoinKeysInChildSchema, projsFromRightPartOfJoinKeysInChildSchema, - projsFromRightPartOfJoinKeysInJoinSchema); - - return jlpi; - } - } - - public static boolean limitRelNode(RelNode rel) { - if ((rel instanceof SortRel) && ((SortRel) rel).getCollation().getFieldCollations().isEmpty()) - return true; - - return false; - } - - public static boolean orderRelNode(RelNode rel) { - if ((rel instanceof SortRel) && !((SortRel) rel).getCollation().getFieldCollations().isEmpty()) - return true; - - return false; - } - - /** - * Get top level select starting from root. Assumption here is root can only - * be SortRel & ProjectRel. Also the top project should be at most 2 levels - * below Sortrel; i.e SortRel(Limit)-SortRel(OB)-Select - * - * @param rootRel - * @return - */ - public static Pair getTopLevelSelect(final RelNode rootRel) { - RelNode tmpRel = rootRel; - RelNode parentOforiginalProjRel = rootRel; - HiveProjectRel originalProjRel = null; - - while (tmpRel != null) { - if (tmpRel instanceof HiveProjectRel) { - originalProjRel = (HiveProjectRel) tmpRel; - break; - } - parentOforiginalProjRel = tmpRel; - tmpRel = tmpRel.getInput(0); - } - - return (new Pair(parentOforiginalProjRel, originalProjRel)); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveTypeSystemImpl.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveTypeSystemImpl.java deleted file mode 100644 index 1bc5a2c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveTypeSystemImpl.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq; - -import org.eigenbase.reltype.RelDataTypeSystemImpl; -import org.eigenbase.sql.type.SqlTypeName; - -public class HiveTypeSystemImpl extends RelDataTypeSystemImpl { - // TODO: This should come from type system; Currently there is no definition - // in type system for this. - private static final int MAX_DECIMAL_PRECISION = 38; - private static final int MAX_DECIMAL_SCALE = 38; - private static final int DEFAULT_DECIMAL_PRECISION = 10; - private static final int MAX_VARCHAR_PRECISION = 65535; - private static final int MAX_CHAR_PRECISION = 255; - private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE; - private static final int MAX_TIMESTAMP_PRECISION = 9; - - @Override - public int getMaxScale(SqlTypeName typeName) { - switch (typeName) { - case DECIMAL: - return getMaxNumericScale(); - case INTERVAL_DAY_TIME: - case INTERVAL_YEAR_MONTH: - return SqlTypeName.MAX_INTERVAL_FRACTIONAL_SECOND_PRECISION; - default: - return -1; - } - } - - @Override - public int getDefaultPrecision(SqlTypeName typeName) { - switch (typeName) { - // Hive will always require user to specify exact sizes for char, varchar; - // Binary doesn't need any sizes; Decimal has the default of 10. - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - case TIME: - case TIMESTAMP: - return getMaxPrecision(typeName); - case DECIMAL: - return DEFAULT_DECIMAL_PRECISION; - case INTERVAL_DAY_TIME: - case INTERVAL_YEAR_MONTH: - return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION; - default: - return -1; - } - } - - @Override - public int getMaxPrecision(SqlTypeName typeName) { - switch (typeName) { - case DECIMAL: - return getMaxNumericPrecision(); - case VARCHAR: - return MAX_VARCHAR_PRECISION; - case CHAR: - return MAX_CHAR_PRECISION; - case VARBINARY: - case BINARY: - return MAX_BINARY_PRECISION; - case TIME: - case TIMESTAMP: - return MAX_TIMESTAMP_PRECISION; - case INTERVAL_DAY_TIME: - case INTERVAL_YEAR_MONTH: - return SqlTypeName.MAX_INTERVAL_START_PRECISION; - default: - return -1; - } - } - - @Override - public int getMaxNumericScale() { - return MAX_DECIMAL_SCALE; - } - - @Override - public int getMaxNumericPrecision() { - return MAX_DECIMAL_PRECISION; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/OptiqSemanticException.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/OptiqSemanticException.java deleted file mode 100644 index d2b08fa..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/OptiqSemanticException.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.optiq; - -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.parse.SemanticException; - -/** - * Exception from SemanticAnalyzer. - */ - -public class OptiqSemanticException extends SemanticException { - - private static final long serialVersionUID = 1L; - - public OptiqSemanticException() { - super(); - } - - public OptiqSemanticException(String message) { - super(message); - } - - public OptiqSemanticException(Throwable cause) { - super(cause); - } - - public OptiqSemanticException(String message, Throwable cause) { - super(message, cause); - } - - public OptiqSemanticException(ErrorMsg errorMsg, String... msgArgs) { - super(errorMsg, msgArgs); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java deleted file mode 100644 index 080d27f..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java +++ /dev/null @@ -1,355 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.Statistics; -import org.apache.hadoop.hive.ql.stats.StatsUtils; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.TableAccessRel; -import org.eigenbase.relopt.RelOptAbstractTable; -import org.eigenbase.relopt.RelOptSchema; -import org.eigenbase.relopt.RelOptUtil.InputFinder; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.rex.RexNode; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMap.Builder; - -public class RelOptHiveTable extends RelOptAbstractTable { - private final Table hiveTblMetadata; - private final String tblAlias; - private final ImmutableList hiveNonPartitionCols; - private final ImmutableMap hiveNonPartitionColsMap; - private final ImmutableMap hivePartitionColsMap; - private final int noOfProjs; - final HiveConf hiveConf; - - private double rowCount = -1; - Map hiveColStatsMap = new HashMap(); - PrunedPartitionList partitionList; - Map partitionCache; - AtomicInteger noColsMissingStats; - - protected static final Log LOG = LogFactory - .getLog(RelOptHiveTable.class - .getName()); - - public RelOptHiveTable(RelOptSchema optiqSchema, String qualifiedTblName, String tblAlias, RelDataType rowType, - Table hiveTblMetadata, List hiveNonPartitionCols, - List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) { - super(optiqSchema, qualifiedTblName, rowType); - this.hiveTblMetadata = hiveTblMetadata; - this.tblAlias = tblAlias; - this.hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); - this.hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); - this.hivePartitionColsMap = getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); - this.noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); - this.hiveConf = hconf; - this.partitionCache = partitionCache; - this.noColsMissingStats = noColsMissingStats; - } - - private static ImmutableMap getColInfoMap(List hiveCols, - int startIndx) { - Builder bldr = ImmutableMap. builder(); - - int indx = startIndx; - for (ColumnInfo ci : hiveCols) { - bldr.put(indx, ci); - indx++; - } - - return bldr.build(); - } - - @Override - public boolean isKey(BitSet arg0) { - return false; - } - - @Override - public RelNode toRel(ToRelContext context) { - return new TableAccessRel(context.getCluster(), this); - } - - @Override - public T unwrap(Class arg0) { - return arg0.isInstance(this) ? arg0.cast(this) : null; - } - - @Override - public double getRowCount() { - if (rowCount == -1) { - if (null == partitionList) { - // we are here either unpartitioned table or partitioned table with no predicates - computePartitionList(hiveConf, null); - } - if (hiveTblMetadata.isPartitioned()) { - List rowCounts = StatsUtils.getBasicStatForPartitions( - hiveTblMetadata, partitionList.getNotDeniedPartns(), - StatsSetupConst.ROW_COUNT); - rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts); - - } else { - rowCount = StatsUtils.getNumRows(hiveTblMetadata); - } - } - - if (rowCount == -1) - noColsMissingStats.getAndIncrement(); - - return rowCount; - } - - public Table getHiveTableMD() { - return hiveTblMetadata; - } - - public String getTableAlias() { - // NOTE: Optiq considers tbls to be equal if their names are the same. Hence - // we need to provide Optiq the fully qualified table name (dbname.tblname) - // and not the user provided aliases. - // However in HIVE DB name can not appear in select list; in case of join - // where table names differ only in DB name, Hive would require user - // introducing explicit aliases for tbl. - if (tblAlias == null) - return hiveTblMetadata.getTableName(); - else - return tblAlias; - } - - private String getColNamesForLogging(Set colLst) { - StringBuffer sb = new StringBuffer(); - boolean firstEntry = true; - for (String colName : colLst) { - if (firstEntry) { - sb.append(colName); - firstEntry = false; - } else { - sb.append(", " + colName); - } - } - return sb.toString(); - } - - public void computePartitionList(HiveConf conf, RexNode pruneNode) { - - try { - if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { - // there is no predicate on partitioning column, we need all partitions in this case. - partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), partitionCache); - return; - } - - // We have valid pruning expressions, only retrieve qualifying partitions - ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true)); - - partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); - } catch (HiveException he) { - throw new RuntimeException(he); - } - } - - private void updateColStats(Set projIndxLst) { - List nonPartColNamesThatRqrStats = new ArrayList(); - List nonPartColIndxsThatRqrStats = new ArrayList(); - List partColNamesThatRqrStats = new ArrayList(); - List partColIndxsThatRqrStats = new ArrayList(); - Set colNamesFailedStats = new HashSet(); - - // 1. Separate required columns to Non Partition and Partition Cols - ColumnInfo tmp; - for (Integer pi : projIndxLst) { - if (hiveColStatsMap.get(pi) == null) { - if ((tmp = hiveNonPartitionColsMap.get(pi)) != null) { - nonPartColNamesThatRqrStats.add(tmp.getInternalName()); - nonPartColIndxsThatRqrStats.add(pi); - } else if ((tmp = hivePartitionColsMap.get(pi)) != null) { - partColNamesThatRqrStats.add(tmp.getInternalName()); - partColIndxsThatRqrStats.add(pi); - } else { - noColsMissingStats.getAndIncrement(); - String logMsg = "Unable to find Column Index: " + pi + ", in " - + hiveTblMetadata.getCompleteName(); - LOG.error(logMsg); - throw new RuntimeException(logMsg); - } - } - } - - if (null == partitionList) { - // We could be here either because its an unpartitioned table or because - // there are no pruning predicates on a partitioned table. - computePartitionList(hiveConf, null); - } - - // 2. Obtain Col Stats for Non Partition Cols - if (nonPartColNamesThatRqrStats.size() > 0) { - List hiveColStats; - - if (!hiveTblMetadata.isPartitioned()) { - // 2.1 Handle the case for unpartitioned table. - hiveColStats = StatsUtils.getTableColumnStats(hiveTblMetadata, hiveNonPartitionCols, - nonPartColNamesThatRqrStats); - - // 2.1.1 Record Column Names that we needed stats for but couldn't - if (hiveColStats == null) { - colNamesFailedStats.addAll(nonPartColNamesThatRqrStats); - } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) { - Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats); - - Set setOfObtainedColStats = new HashSet(); - for (ColStatistics cs : hiveColStats) { - setOfObtainedColStats.add(cs.getColumnName()); - } - setOfFiledCols.removeAll(setOfObtainedColStats); - - colNamesFailedStats.addAll(setOfFiledCols); - } - } else { - // 2.2 Obtain col stats for partitioned table. - try { - if (partitionList.getNotDeniedPartns().isEmpty()) { - // no need to make a metastore call - rowCount = 0; - hiveColStats = new ArrayList(); - for (String c : nonPartColNamesThatRqrStats) { - // add empty stats object for each column - hiveColStats.add(new ColStatistics(hiveTblMetadata.getTableName(), c, null)); - } - colNamesFailedStats.clear(); - } else { - Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, - hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, - nonPartColNamesThatRqrStats, true, true); - rowCount = stats.getNumRows(); - hiveColStats = new ArrayList(); - for (String c : nonPartColNamesThatRqrStats) { - ColStatistics cs = stats.getColumnStatisticsFromColName(c); - if (cs != null) { - hiveColStats.add(cs); - } else { - colNamesFailedStats.add(c); - } - } - } - } catch (HiveException e) { - String logMsg = "Collecting stats failed."; - LOG.error(logMsg); - throw new RuntimeException(logMsg); - } - } - - if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) { - for (int i = 0; i < hiveColStats.size(); i++) { - hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i)); - } - } - } - - // 3. Obtain Stats for Partition Cols - if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) { - ColStatistics cStats = null; - for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { - cStats = new ColStatistics(hiveTblMetadata.getTableName(), - partColNamesThatRqrStats.get(i), hivePartitionColsMap.get( - partColIndxsThatRqrStats.get(i)).getTypeName()); - cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i))); - hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); - } - } - - // 4. Warn user if we could get stats for required columns - if (!colNamesFailedStats.isEmpty()) { - String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " - + getColNamesForLogging(colNamesFailedStats); - LOG.error(logMsg); - noColsMissingStats.getAndAdd(colNamesFailedStats.size()); - throw new RuntimeException(logMsg); - } - } - - private int getDistinctCount(Set partitions, String partColName) { - Set distinctVals = new HashSet(partitions.size()); - for (Partition partition : partitions) { - distinctVals.add(partition.getSpec().get(partColName)); - } - return distinctVals.size(); - } - - public List getColStat(List projIndxLst) { - ImmutableList.Builder colStatsBldr = ImmutableList. builder(); - - if (projIndxLst != null) { - updateColStats(new HashSet(projIndxLst)); - for (Integer i : projIndxLst) { - colStatsBldr.add(hiveColStatsMap.get(i)); - } - } else { - List pILst = new ArrayList(); - for (Integer i = 0; i < noOfProjs; i++) { - pILst.add(i); - } - updateColStats(new HashSet(pILst)); - for (Integer pi : pILst) { - colStatsBldr.add(hiveColStatsMap.get(pi)); - } - } - - return colStatsBldr.build(); - } - - /* - * use to check if a set of columns are all partition columns. - * true only if: - * - all columns in BitSet are partition - * columns. - */ - public boolean containsPartitionColumnsOnly(BitSet cols) { - - for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) { - if (!hivePartitionColsMap.containsKey(i)) { - return false; - } - } - return true; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java deleted file mode 100644 index 4b44a28..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.optiq; - - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; -import org.eigenbase.rel.RelCollation; -import org.eigenbase.rel.RelCollationImpl; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelTraitSet; - -public class TraitsUtil { - public static RelTraitSet getSortTraitSet(RelOptCluster cluster, RelTraitSet traitSet, - RelCollation collation) { - return traitSet.plus(collation); - } - - public static RelTraitSet getDefaultTraitSet(RelOptCluster cluster) { - return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java deleted file mode 100644 index 72fe5d6..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java +++ /dev/null @@ -1,212 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.cost; - -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptCostFactory; -import org.eigenbase.relopt.RelOptUtil; - -// TODO: This should inherit from VolcanoCost and should just override isLE method. -public class HiveCost implements RelOptCost { - // ~ Static fields/initializers --------------------------------------------- - - public static final HiveCost INFINITY = new HiveCost(Double.POSITIVE_INFINITY, - Double.POSITIVE_INFINITY, - Double.POSITIVE_INFINITY) { - @Override - public String toString() { - return "{inf}"; - } - }; - - public static final HiveCost HUGE = new HiveCost(Double.MAX_VALUE, Double.MAX_VALUE, - Double.MAX_VALUE) { - @Override - public String toString() { - return "{huge}"; - } - }; - - public static final HiveCost ZERO = new HiveCost(0.0, 0.0, 0.0) { - @Override - public String toString() { - return "{0}"; - } - }; - - public static final HiveCost TINY = new HiveCost(1.0, 1.0, 0.0) { - @Override - public String toString() { - return "{tiny}"; - } - }; - - public static final RelOptCostFactory FACTORY = new Factory(); - - // ~ Instance fields -------------------------------------------------------- - - final double cpu; - final double io; - final double rowCount; - - // ~ Constructors ----------------------------------------------------------- - - HiveCost(double rowCount, double cpu, double io) { - assert rowCount >= 0d; - assert cpu >= 0d; - assert io >= 0d; - this.rowCount = rowCount; - this.cpu = cpu; - this.io = io; - } - - // ~ Methods ---------------------------------------------------------------- - - public double getCpu() { - return cpu; - } - - public boolean isInfinite() { - return (this == INFINITY) || (this.rowCount == Double.POSITIVE_INFINITY) - || (this.cpu == Double.POSITIVE_INFINITY) || (this.io == Double.POSITIVE_INFINITY); - } - - public double getIo() { - return io; - } - - // TODO: If two cost is equal, could we do any better than comparing - // cardinality (may be some other heuristics to break the tie) - public boolean isLe(RelOptCost other) { - return this == other || this.rowCount <= other.getRows(); - /* - * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows - * <= other.getRows())) { return true; } else { return false; } - */ - } - - public boolean isLt(RelOptCost other) { - return this.rowCount < other.getRows(); - /* - * return isLe(other) && !equals(other); - */ - } - - public double getRows() { - return rowCount; - } - - public boolean equals(RelOptCost other) { - return (this == other) || ((this.rowCount) == (other.getRows())); - - /* - * //TODO: should we consider cardinality as well? return (this == other) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo())); - */ - } - - public boolean isEqWithEpsilon(RelOptCost other) { - return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON); - // Turn this one once we do the Algorithm selection in CBO - /* - * return (this == other) || (Math.abs((this.dCpu + this.dIo) - - * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); - */ - } - - public RelOptCost minus(RelOptCost other) { - if (this == INFINITY) { - return this; - } - - return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io - - other.getIo()); - } - - public RelOptCost multiplyBy(double factor) { - if (this == INFINITY) { - return this; - } - return new HiveCost(rowCount * factor, cpu * factor, io * factor); - } - - public double divideBy(RelOptCost cost) { - // Compute the geometric average of the ratios of all of the factors - // which are non-zero and finite. - double d = 1; - double n = 0; - if ((this.rowCount != 0) && !Double.isInfinite(this.rowCount) && (cost.getRows() != 0) - && !Double.isInfinite(cost.getRows())) { - d *= this.rowCount / cost.getRows(); - ++n; - } - if ((this.cpu != 0) && !Double.isInfinite(this.cpu) && (cost.getCpu() != 0) - && !Double.isInfinite(cost.getCpu())) { - d *= this.cpu / cost.getCpu(); - ++n; - } - if ((this.io != 0) && !Double.isInfinite(this.io) && (cost.getIo() != 0) - && !Double.isInfinite(cost.getIo())) { - d *= this.io / cost.getIo(); - ++n; - } - if (n == 0) { - return 1.0; - } - return Math.pow(d, 1 / n); - } - - public RelOptCost plus(RelOptCost other) { - if ((this == INFINITY) || (other.isInfinite())) { - return INFINITY; - } - return new HiveCost(this.rowCount + other.getRows(), this.cpu + other.getCpu(), this.io - + other.getIo()); - } - - @Override - public String toString() { - return "{" + rowCount + " rows, " + cpu + " cpu, " + io + " io}"; - } - - private static class Factory implements RelOptCostFactory { - private Factory() { - } - - public RelOptCost makeCost(double rowCount, double cpu, double io) { - return new HiveCost(rowCount, cpu, io); - } - - public RelOptCost makeHugeCost() { - return HUGE; - } - - public HiveCost makeInfiniteCost() { - return INFINITY; - } - - public HiveCost makeTinyCost() { - return TINY; - } - - public HiveCost makeZeroCost() { - return ZERO; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java deleted file mode 100644 index 7436f12..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.cost; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.eigenbase.relopt.RelOptCost; - -// Use this once we have Join Algorithm selection -public class HiveCostUtil { - private static final double cpuCostInNanoSec = 1.0; - private static final double netCostInNanoSec = 150 * cpuCostInNanoSec; - private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec; - private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec; - private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec; - @SuppressWarnings("unused") -//Use this once we have Join Algorithm selection - private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec; - - public static RelOptCost computCardinalityBasedCost(HiveRel hr) { - return new HiveCost(hr.getRows(), 0, 0); - } - - public static HiveCost computeCost(HiveTableScanRel t) { - double cardinality = t.getRows(); - return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java deleted file mode 100644 index 5deb801..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.optiq.cost; - -import org.eigenbase.rel.RelCollationTraitDef; -import org.eigenbase.relopt.ConventionTraitDef; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.volcano.VolcanoPlanner; - -/** - * Refinement of {@link org.eigenbase.relopt.volcano.VolcanoPlanner} for Hive. - * - *

- * It uses {@link org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost} as - * its cost model. - */ -public class HiveVolcanoPlanner extends VolcanoPlanner { - private static final boolean ENABLE_COLLATION_TRAIT = true; - - /** Creates a HiveVolcanoPlanner. */ - public HiveVolcanoPlanner() { - super(HiveCost.FACTORY, null); - } - - public static RelOptPlanner createPlanner() { - final VolcanoPlanner planner = new HiveVolcanoPlanner(); - planner.addRelTraitDef(ConventionTraitDef.INSTANCE); - if (ENABLE_COLLATION_TRAIT) { - planner.addRelTraitDef(RelCollationTraitDef.INSTANCE); - } - return planner; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java deleted file mode 100644 index fc19895..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.BitSet; -import java.util.List; - -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.eigenbase.rel.AggregateCall; -import org.eigenbase.rel.AggregateRelBase; -import org.eigenbase.rel.InvalidRelException; -import org.eigenbase.rel.RelFactories.AggregateFactory; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; - -public class HiveAggregateRel extends AggregateRelBase implements HiveRel { - - public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory(); - - public HiveAggregateRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, - BitSet groupSet, List aggCalls) throws InvalidRelException { - super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, groupSet, aggCalls); - } - - @Override - public AggregateRelBase copy(RelTraitSet traitSet, RelNode input, BitSet groupSet, - List aggCalls) { - try { - return new HiveAggregateRel(getCluster(), traitSet, input, groupSet, aggCalls); - } catch (InvalidRelException e) { - // Semantic error not possible. Must be a bug. Convert to - // internal error. - throw new AssertionError(e); - } - } - - @Override - public void implement(Implementor implementor) { - } - - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); - } - - @Override - public double getRows() { - return RelMetadataQuery.getDistinctRowCount(this, groupSet, getCluster().getRexBuilder() - .makeLiteral(true)); - } - - private static class HiveAggRelFactory implements AggregateFactory { - - @Override - public RelNode createAggregate(RelNode child, BitSet groupSet, - List aggCalls) { - try { - return new HiveAggregateRel(child.getCluster(), child.getTraitSet(), child, groupSet, aggCalls); - } catch (InvalidRelException e) { - throw new RuntimeException(e); - } - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java deleted file mode 100644 index 8b85046..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.RelFactories.FilterFactory; -import org.eigenbase.rel.RelNode; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.rex.RexNode; - -public class HiveFilterRel extends FilterRelBase implements HiveRel { - - public static final FilterFactory DEFAULT_FILTER_FACTORY = new HiveFilterFactoryImpl(); - - public HiveFilterRel(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) { - super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); - } - - @Override - public FilterRelBase copy(RelTraitSet traitSet, RelNode input, RexNode condition) { - assert traitSet.containsIfApplicable(HiveRel.CONVENTION); - return new HiveFilterRel(getCluster(), traitSet, input, getCondition()); - } - - @Override - public void implement(Implementor implementor) { - } - - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); - } - - /** - * Implementation of {@link FilterFactory} that returns - * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel} - * . - */ - private static class HiveFilterFactoryImpl implements FilterFactory { - @Override - public RelNode createFilter(RelNode child, RexNode condition) { - RelOptCluster cluster = child.getCluster(); - HiveFilterRel filter = new HiveFilterRel(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); - return filter; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java deleted file mode 100644 index 3d6aa84..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java +++ /dev/null @@ -1,157 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.Collections; -import java.util.Set; - -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.eigenbase.rel.InvalidRelException; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.RelFactories.JoinFactory; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexNode; - -//TODO: Should we convert MultiJoin to be a child of HiveJoinRelBase -public class HiveJoinRel extends JoinRelBase implements HiveRel { - // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN - // each parallel computation handles multiple splits where as in case of SMB - // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is - // hash joins where MAP_JOIN keeps the whole data set of non streaming tables - // in memory where as BUCKET_JOIN keeps only the b - public enum JoinAlgorithm { - NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN - } - - public enum MapJoinStreamingRelation { - NONE, LEFT_RELATION, RIGHT_RELATION - } - - public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); - - private final boolean leftSemiJoin; - private final JoinAlgorithm joinAlgorithm; - //This will be used once we do Join Algorithm selection - @SuppressWarnings("unused") - private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE; - - public static HiveJoinRel getJoin(RelOptCluster cluster, RelNode left, RelNode right, - RexNode condition, JoinRelType joinType, boolean leftSemiJoin) { - try { - Set variablesStopped = Collections.emptySet(); - return new HiveJoinRel(cluster, null, left, right, condition, joinType, variablesStopped, - JoinAlgorithm.NONE, null, leftSemiJoin); - } catch (InvalidRelException e) { - throw new RuntimeException(e); - } - } - - protected HiveJoinRel(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, - RexNode condition, JoinRelType joinType, Set variablesStopped, - JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin) - throws InvalidRelException { - super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, - variablesStopped); - this.joinAlgorithm = joinAlgo; - this.leftSemiJoin = leftSemiJoin; - } - - @Override - public void implement(Implementor implementor) { - } - - @Override - public final HiveJoinRel copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left, - RelNode right, JoinRelType joinType, boolean semiJoinDone) { - try { - Set variablesStopped = Collections.emptySet(); - return new HiveJoinRel(getCluster(), traitSet, left, right, conditionExpr, joinType, - variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin); - } catch (InvalidRelException e) { - // Semantic error not possible. Must be a bug. Convert to - // internal error. - throw new AssertionError(e); - } - } - - public JoinAlgorithm getJoinAlgorithm() { - return joinAlgorithm; - } - - public boolean isLeftSemiJoin() { - return leftSemiJoin; - } - - /** - * Model cost of join as size of Inputs. - */ - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - double leftRCount = RelMetadataQuery.getRowCount(getLeft()); - double rightRCount = RelMetadataQuery.getRowCount(getRight()); - return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); - } - - /** - * @return returns rowtype representing only the left join input - */ - @Override - public RelDataType deriveRowType() { - if (leftSemiJoin) { - return deriveJoinRowType(left.getRowType(), null, JoinRelType.INNER, - getCluster().getTypeFactory(), null, - Collections. emptyList()); - } - return super.deriveRowType(); - } - - private static class HiveJoinFactoryImpl implements JoinFactory { - /** - * Creates a join. - * - * @param left - * Left input - * @param right - * Right input - * @param condition - * Join condition - * @param joinType - * Join type - * @param variablesStopped - * Set of names of variables which are set by the LHS and used by - * the RHS and are not available to nodes above this JoinRel in the - * tree - * @param semiJoinDone - * Whether this join has been translated to a semi-join - */ - @Override - public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType, - Set variablesStopped, boolean semiJoinDone) { - return getJoin(left.getCluster(), left, right, condition, joinType, false); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java deleted file mode 100644 index f8755d0..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.List; - -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.SingleRel; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.rex.RexNode; - -public class HiveLimitRel extends SingleRel implements HiveRel { - private final RexNode offset; - private final RexNode fetch; - - HiveLimitRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RexNode offset, - RexNode fetch) { - super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child); - this.offset = offset; - this.fetch = fetch; - assert getConvention() instanceof HiveRel; - assert getConvention() == child.getConvention(); - } - - @Override - public HiveLimitRel copy(RelTraitSet traitSet, List newInputs) { - return new HiveLimitRel(getCluster(), traitSet, sole(newInputs), offset, fetch); - } - - public void implement(Implementor implementor) { - } - - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java deleted file mode 100644 index 7b434ea..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java +++ /dev/null @@ -1,204 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import com.google.common.collect.ImmutableList; - -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelCollation; -import org.eigenbase.rel.RelFactories.ProjectFactory; -import org.eigenbase.rel.RelNode; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexUtil; -import org.eigenbase.util.Util; -import org.eigenbase.util.mapping.Mapping; -import org.eigenbase.util.mapping.MappingType; - -public class HiveProjectRel extends ProjectRelBase implements HiveRel { - - public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl(); - - private final List virtualCols; - - /** - * Creates a HiveProjectRel. - * - * @param cluster - * Cluster this relational expression belongs to - * @param child - * input relational expression - * @param exps - * List of expressions for the input columns - * @param rowType - * output row type - * @param flags - * values as in {@link ProjectRelBase.Flags} - */ - public HiveProjectRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, - List exps, RelDataType rowType, int flags) { - super(cluster, traitSet, child, exps, rowType, flags); - virtualCols = ImmutableList.copyOf(HiveOptiqUtil.getVirtualCols(exps)); - } - - /** - * Creates a HiveProjectRel with no sort keys. - * - * @param child - * input relational expression - * @param exps - * set of expressions for the input columns - * @param fieldNames - * aliases of the expressions - */ - public static HiveProjectRel create(RelNode child, List exps, - List fieldNames) throws OptiqSemanticException{ - RelOptCluster cluster = child.getCluster(); - - // 1 Ensure columnNames are unique - OPTIQ-411 - if (fieldNames != null && !Util.isDistinct(fieldNames)) { - String msg = "Select list contains multiple expressions with the same name." + fieldNames; - throw new OptiqSemanticException(msg); - } - RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames); - return create(cluster, child, exps, rowType, Collections. emptyList()); - } - - /** - * Creates a HiveProjectRel. - */ - public static HiveProjectRel create(RelOptCluster cluster, RelNode child, List exps, - RelDataType rowType, final List collationList) { - RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster); - return new HiveProjectRel(cluster, traitSet, child, exps, rowType, Flags.BOXED); - } - - /** - * Creates a HiveProjectRel. - */ - public static HiveProjectRel create(RelOptCluster cluster, RelNode child, List exps, - RelDataType rowType, RelTraitSet traitSet, final List collationList) { - return new HiveProjectRel(cluster, traitSet, child, exps, rowType, Flags.BOXED); - } - - /** - * Creates a relational expression which projects the output fields of a - * relational expression according to a partial mapping. - * - *

- * A partial mapping is weaker than a permutation: every target has one - * source, but a source may have 0, 1 or more than one targets. Usually the - * result will have fewer fields than the source, unless some source fields - * are projected multiple times. - * - *

- * This method could optimize the result as {@link #permute} does, but does - * not at present. - * - * @param rel - * Relational expression - * @param mapping - * Mapping from source fields to target fields. The mapping type must - * obey the constraints {@link MappingType#isMandatorySource()} and - * {@link MappingType#isSingleSource()}, as does - * {@link MappingType#INVERSE_FUNCTION}. - * @param fieldNames - * Field names; if null, or if a particular entry is null, the name - * of the permuted field is used - * @return relational expression which projects a subset of the input fields - * @throws OptiqSemanticException - */ - public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) throws OptiqSemanticException { - assert mapping.getMappingType().isSingleSource(); - assert mapping.getMappingType().isMandatorySource(); - - if (mapping.isIdentity()) { - return rel; - } - - final List outputNameList = new ArrayList(); - final List outputProjList = new ArrayList(); - final List fields = rel.getRowType().getFieldList(); - final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); - - for (int i = 0; i < mapping.getTargetCount(); i++) { - int source = mapping.getSource(i); - final RelDataTypeField sourceField = fields.get(source); - outputNameList - .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField - .getName() : fieldNames.get(i)); - outputProjList.add(rexBuilder.makeInputRef(rel, source)); - } - - return create(rel, outputProjList, outputNameList); - } - - @Override - public ProjectRelBase copy(RelTraitSet traitSet, RelNode input, List exps, - RelDataType rowType) { - assert traitSet.containsIfApplicable(HiveRel.CONVENTION); - return new HiveProjectRel(getCluster(), traitSet, input, exps, rowType, getFlags()); - } - - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); - } - - @Override - public void implement(Implementor implementor) { - } - - public List getVirtualCols() { - return virtualCols; - } - - /** - * Implementation of {@link ProjectFactory} that returns - * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel} - * . - */ - private static class HiveProjectFactoryImpl implements ProjectFactory { - - @Override - public RelNode createProject(RelNode child, - List childExprs, List fieldNames) { - RelOptCluster cluster = child.getCluster(); - RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), childExprs, fieldNames); - RelNode project = HiveProjectRel.create(cluster, child, - childExprs, rowType, - child.getTraitSet(), Collections. emptyList()); - - return project; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java deleted file mode 100644 index 4738c4a..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import org.eigenbase.rel.RelNode; -import org.eigenbase.relopt.Convention; - -public interface HiveRel extends RelNode { - void implement(Implementor implementor); - - /** Calling convention for relational operations that occur in Hive. */ - final Convention CONVENTION = new Convention.Impl("HIVE", HiveRel.class); - - class Implementor { - - public void visitChild(int ordinal, RelNode input) { - assert ordinal == 0; - ((HiveRel) input).implement(this); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java deleted file mode 100644 index f85363d..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.Map; - -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.eigenbase.rel.RelCollation; -import org.eigenbase.rel.RelFactories; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.SortRel; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.rex.RexNode; - -import com.google.common.collect.ImmutableMap; - -public class HiveSortRel extends SortRel implements HiveRel { - - public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory(); - - // NOTE: this is to work around Hive Optiq Limitations w.r.t OB. - // 1. Optiq can not accept expressions in OB; instead it needs to be expressed - // as VC in input Select. - // 2. Hive can not preserve ordering through select boundaries. - // 3. This map is used for outermost OB to migrate the VC corresponding OB - // expressions from input select. - // 4. This is used by ASTConverter after we are done with Optiq Planning - private ImmutableMap mapOfInputRefToRexCall; - - public HiveSortRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, - RelCollation collation, RexNode offset, RexNode fetch) { - super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation, - offset, fetch); - } - - @Override - public HiveSortRel copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation, - RexNode offset, RexNode fetch) { - // TODO: can we blindly copy sort trait? What if inputs changed and we - // are now sorting by different cols - RelCollation canonizedCollation = traitSet.canonize(newCollation); - return new HiveSortRel(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); - } - - public RexNode getFetchExpr() { - return fetch; - } - - public void setInputRefToCallMap(ImmutableMap refToCall) { - this.mapOfInputRefToRexCall = refToCall; - } - - public Map getInputRefToCallMap() { - return this.mapOfInputRefToRexCall; - } - - @Override - public void implement(Implementor implementor) { - } - - private static class HiveSortRelFactory implements RelFactories.SortFactory { - - @Override - public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation, - RexNode offset, RexNode fetch) { - return new HiveSortRel(child.getCluster(), traits, child, collation, offset, fetch); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java deleted file mode 100644 index bd66459..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.List; - -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.TableAccessRelBase; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.reltype.RelDataType; - - -/** - * Relational expression representing a scan of a HiveDB collection. - * - *

- * Additional operations might be applied, using the "find" or "aggregate" - * methods. - *

- */ -public class HiveTableScanRel extends TableAccessRelBase implements HiveRel { - - /** - * Creates a HiveTableScan. - * - * @param cluster - * Cluster - * @param traitSet - * Traits - * @param table - * Table - * @param table - * HiveDB table - */ - public HiveTableScanRel(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, - RelDataType rowtype) { - super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table); - assert getConvention() == HiveRel.CONVENTION; - } - - @Override - public RelNode copy(RelTraitSet traitSet, List inputs) { - assert inputs.isEmpty(); - return this; - } - - @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); - } - - @Override - public void register(RelOptPlanner planner) { - - } - - @Override - public void implement(Implementor implementor) { - - } - - @Override - public double getRows() { - return ((RelOptHiveTable) table).getRowCount(); - } - - public List getColStat(List projIndxLst) { - return ((RelOptHiveTable) table).getColStat(projIndxLst); - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java deleted file mode 100644 index d34fe95..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; - -import java.util.List; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel.Implementor; -import org.eigenbase.rel.RelFactories; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.SetOpRel; -import org.eigenbase.rel.UnionRelBase; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.sql.SqlKind; - -public class HiveUnionRel extends UnionRelBase { - - public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory(); - - public HiveUnionRel(RelOptCluster cluster, RelTraitSet traits, List inputs) { - super(cluster, traits, inputs, true); - } - - @Override - public SetOpRel copy(RelTraitSet traitSet, List inputs, boolean all) { - return new HiveUnionRel(this.getCluster(), traitSet, inputs); - } - - public void implement(Implementor implementor) { - } - - private static class HiveUnionRelFactory implements RelFactories.SetOpFactory { - - @Override - public RelNode createSetOp(SqlKind kind, List inputs, boolean all) { - if (kind != SqlKind.UNION) { - throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind); - } - return new HiveUnionRel(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java deleted file mode 100644 index d6581e6..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.rules; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; -import org.eigenbase.rel.rules.MergeProjectRule; - -//Currently not used, turn this on later -public class HiveMergeProjectRule extends MergeProjectRule { - public static final HiveMergeProjectRule INSTANCE = new HiveMergeProjectRule(); - - public HiveMergeProjectRule() { - super(true, HiveProjectRel.DEFAULT_PROJECT_FACTORY); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java deleted file mode 100644 index ee19a6c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.rules; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.relopt.RelOptRule; -import org.eigenbase.relopt.RelOptRuleCall; -import org.eigenbase.rex.RexNode; -import org.eigenbase.util.Pair; - -public class HivePartitionPrunerRule extends RelOptRule { - - HiveConf conf; - - public HivePartitionPrunerRule(HiveConf conf) { - super(operand(HiveFilterRel.class, operand(HiveTableScanRel.class, none()))); - this.conf = conf; - } - - @Override - public void onMatch(RelOptRuleCall call) { - HiveFilterRel filter = call.rel(0); - HiveTableScanRel tScan = call.rel(1); - perform(call, filter, tScan); - } - - protected void perform(RelOptRuleCall call, FilterRelBase filter, - HiveTableScanRel tScan) { - - RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); - RexNode predicate = filter.getCondition(); - - Pair predicates = PartitionPruner - .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate); - RexNode partColExpr = predicates.left; - hiveTable.computePartitionList(conf, partColExpr); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java deleted file mode 100644 index 1c483ea..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.rules; - -import java.util.BitSet; -import java.util.List; -import java.util.ListIterator; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.RelFactories; -import org.eigenbase.rel.rules.PushFilterPastJoinRule; -import org.eigenbase.relopt.RelOptRule; -import org.eigenbase.relopt.RelOptRuleCall; -import org.eigenbase.relopt.RelOptRuleOperand; -import org.eigenbase.relopt.RelOptUtil.InputFinder; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexNode; -import org.eigenbase.sql.SqlKind; - -public abstract class HivePushFilterPastJoinRule extends PushFilterPastJoinRule { - - public static final HivePushFilterPastJoinRule FILTER_ON_JOIN = new HivePushFilterIntoJoinRule(); - - public static final HivePushFilterPastJoinRule JOIN = new HivePushDownJoinConditionRule(); - - /** - * Creates a PushFilterPastJoinRule with an explicit root operand. - */ - protected HivePushFilterPastJoinRule(RelOptRuleOperand operand, String id, boolean smart, - RelFactories.FilterFactory filterFactory, RelFactories.ProjectFactory projectFactory) { - super(operand, id, smart, filterFactory, projectFactory); - } - - /** - * Rule that tries to push filter expressions into a join condition and into - * the inputs of the join. - */ - public static class HivePushFilterIntoJoinRule extends HivePushFilterPastJoinRule { - public HivePushFilterIntoJoinRule() { - super(RelOptRule.operand(FilterRelBase.class, - RelOptRule.operand(JoinRelBase.class, RelOptRule.any())), - "HivePushFilterPastJoinRule:filter", true, HiveFilterRel.DEFAULT_FILTER_FACTORY, - HiveProjectRel.DEFAULT_PROJECT_FACTORY); - } - - @Override - public void onMatch(RelOptRuleCall call) { - FilterRelBase filter = call.rel(0); - JoinRelBase join = call.rel(1); - super.perform(call, filter, join); - } - } - - public static class HivePushDownJoinConditionRule extends HivePushFilterPastJoinRule { - public HivePushDownJoinConditionRule() { - super(RelOptRule.operand(JoinRelBase.class, RelOptRule.any()), - "HivePushFilterPastJoinRule:no-filter", true, HiveFilterRel.DEFAULT_FILTER_FACTORY, - HiveProjectRel.DEFAULT_PROJECT_FACTORY); - } - - @Override - public void onMatch(RelOptRuleCall call) { - JoinRelBase join = call.rel(0); - super.perform(call, null, join); - } - } - - /* - * Any predicates pushed down to joinFilters that aren't equality conditions: - * put them back as aboveFilters because Hive doesn't support not equi join - * conditions. - */ - @Override - protected void validateJoinFilters(List aboveFilters, List joinFilters, - JoinRelBase join, JoinRelType joinType) { - if (joinType.equals(JoinRelType.INNER)) { - ListIterator filterIter = joinFilters.listIterator(); - while (filterIter.hasNext()) { - RexNode exp = filterIter.next(); - - if (exp instanceof RexCall) { - RexCall c = (RexCall) exp; - boolean validHiveJoinFilter = false; - - if ((c.getOperator().getKind() == SqlKind.EQUALS)) { - validHiveJoinFilter = true; - for (RexNode rn : c.getOperands()) { - // NOTE: Hive dis-allows projections from both left & right side - // of join condition. Example: Hive disallows - // (r1.x +r2.x)=(r1.y+r2.y) on join condition. - if (filterRefersToBothSidesOfJoin(rn, join)) { - validHiveJoinFilter = false; - break; - } - } - } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN) - || (c.getOperator().getKind() == SqlKind.GREATER_THAN) - || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL) - || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) { - validHiveJoinFilter = true; - // NOTE: Hive dis-allows projections from both left & right side of - // join in in equality condition. Example: Hive disallows (r1.x < - // r2.x) on join condition. - if (filterRefersToBothSidesOfJoin(c, join)) { - validHiveJoinFilter = false; - } - } - - if (validHiveJoinFilter) - continue; - } - - aboveFilters.add(exp); - filterIter.remove(); - } - } - } - - private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) { - boolean refersToBothSides = false; - - int joinNoOfProjects = j.getRowType().getFieldCount(); - BitSet filterProjs = new BitSet(joinNoOfProjects); - BitSet allLeftProjs = new BitSet(joinNoOfProjects); - BitSet allRightProjs = new BitSet(joinNoOfProjects); - allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true); - allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true); - - InputFinder inputFinder = new InputFinder(filterProjs); - filter.accept(inputFinder); - - if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs)) - refersToBothSides = true; - - return refersToBothSides; - } -} - -// End PushFilterPastJoinRule.java - diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java deleted file mode 100644 index bdc8373..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java +++ /dev/null @@ -1,207 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.rules; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexLiteral; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexVisitorImpl; -import org.eigenbase.sql.fun.SqlStdOperatorTable; -import org.eigenbase.util.Pair; - -public class PartitionPruner { - - /** - * Breaks the predicate into 2 pieces. The first piece is the expressions that - * only contain partition columns and can be used for Partition Pruning; the - * second piece is the predicates that are left. - * - * @param cluster - * @param hiveTable - * @param predicate - * @return a Pair of expressions, each of which maybe null. The 1st predicate - * is expressions that only contain partition columns; the 2nd - * predicate contains the remaining predicates. - */ - public static Pair extractPartitionPredicates( - RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) { - RexNode partitionPruningPred = predicate - .accept(new ExtractPartPruningPredicate(cluster, hiveTable)); - RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate( - cluster, partitionPruningPred)); - return new Pair(partitionPruningPred, remainingPred); - } - - public static class ExtractPartPruningPredicate extends - RexVisitorImpl { - - final RelOptHiveTable hiveTable; - final RelDataType rType; - final Set partCols; - final RelOptCluster cluster; - - public ExtractPartPruningPredicate(RelOptCluster cluster, - RelOptHiveTable hiveTable) { - super(true); - this.hiveTable = hiveTable; - rType = hiveTable.getRowType(); - List pfs = hiveTable.getHiveTableMD().getPartCols(); - partCols = new HashSet(); - for (FieldSchema pf : pfs) { - partCols.add(pf.getName()); - } - this.cluster = cluster; - } - - @Override - public RexNode visitLiteral(RexLiteral literal) { - return literal; - } - - @Override - public RexNode visitInputRef(RexInputRef inputRef) { - RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); - if (partCols.contains(f.getName())) { - return inputRef; - } else { - return null; - } - } - - @Override - public RexNode visitCall(RexCall call) { - if (!deep) { - return null; - } - - List args = new LinkedList(); - boolean argsPruned = false; - - GenericUDF hiveUDF = SqlFunctionConverter.getHiveUDF(call.getOperator(), - call.getType(), call.operands.size()); - if (hiveUDF != null && - !FunctionRegistry.isDeterministic(hiveUDF)) { - return null; - } - - for (RexNode operand : call.operands) { - RexNode n = operand.accept(this); - if (n != null) { - args.add(n); - } else { - argsPruned = true; - } - } - - if (call.getOperator() != SqlStdOperatorTable.AND) { - return argsPruned ? null : call; - } else { - if (args.size() == 0) { - return null; - } else if (args.size() == 1) { - return args.get(0); - } else { - return cluster.getRexBuilder().makeCall(call.getOperator(), args); - } - } - } - - } - - public static class ExtractRemainingPredicate extends RexVisitorImpl { - - List pruningPredicates; - final RelOptCluster cluster; - - public ExtractRemainingPredicate(RelOptCluster cluster, - RexNode partPruningExpr) { - super(true); - this.cluster = cluster; - pruningPredicates = new ArrayList(); - flattenPredicates(partPruningExpr); - } - - private void flattenPredicates(RexNode r) { - if (r instanceof RexCall - && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) { - for (RexNode c : ((RexCall) r).getOperands()) { - flattenPredicates(c); - } - } else { - pruningPredicates.add(r); - } - } - - @Override - public RexNode visitLiteral(RexLiteral literal) { - return literal; - } - - @Override - public RexNode visitInputRef(RexInputRef inputRef) { - return inputRef; - } - - @Override - public RexNode visitCall(RexCall call) { - if (!deep) { - return null; - } - - if (call.getOperator() != SqlStdOperatorTable.AND) { - if (pruningPredicates.contains(call)) { - return null; - } else { - return call; - } - } - - List args = new LinkedList(); - - for (RexNode operand : call.operands) { - RexNode n = operand.accept(this); - if (n != null) { - args.add(n); - } - } - - if (args.size() == 0) { - return null; - } else if (args.size() == 1) { - return args.get(0); - } else { - return cluster.getRexBuilder().makeCall(call.getOperator(), args); - } - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java deleted file mode 100644 index 28bf2ad..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.stats; - -import java.util.BitSet; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexVisitorImpl; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.sql.SqlOperator; -import org.eigenbase.sql.type.SqlTypeUtil; - -import com.google.common.collect.Sets; - -public class FilterSelectivityEstimator extends RexVisitorImpl { - private final RelNode childRel; - private final double childCardinality; - - protected FilterSelectivityEstimator(RelNode childRel) { - super(true); - this.childRel = childRel; - this.childCardinality = RelMetadataQuery.getRowCount(childRel); - } - - public Double estimateSelectivity(RexNode predicate) { - return predicate.accept(this); - } - - public Double visitCall(RexCall call) { - if (!deep) { - return 1.0; - } - - /* - * Ignore any predicates on partition columns because we have already - * accounted for these in the Table row count. - */ - if (isPartitionPredicate(call, this.childRel)) { - return 1.0; - } - - Double selectivity = null; - SqlKind op = getOp(call); - - switch (op) { - case AND: { - selectivity = computeConjunctionSelectivity(call); - break; - } - - case OR: { - selectivity = computeDisjunctionSelectivity(call); - break; - } - - case NOT: - case NOT_EQUALS: { - selectivity = computeNotEqualitySelectivity(call); - break; - } - - case LESS_THAN_OR_EQUAL: - case GREATER_THAN_OR_EQUAL: - case LESS_THAN: - case GREATER_THAN: { - selectivity = ((double) 1 / (double) 3); - break; - } - - case IN: { - // TODO: 1) check for duplicates 2) We assume in clause values to be - // present in NDV which may not be correct (Range check can find it) 3) We - // assume values in NDV set is uniformly distributed over col values - // (account for skewness - histogram). - selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1); - if (selectivity <= 0.0) { - selectivity = 0.10; - } else if (selectivity >= 1.0) { - selectivity = 1.0; - } - break; - } - - default: - selectivity = computeFunctionSelectivity(call); - } - - return selectivity; - } - - /** - * NDV of "f1(x, y, z) != f2(p, q, r)" -> - * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)". - *

- * - * @param call - * @return - */ - private Double computeNotEqualitySelectivity(RexCall call) { - double tmpNDV = getMaxNDV(call); - - if (tmpNDV > 1) - return (tmpNDV - (double) 1) / tmpNDV; - else - return 1.0; - } - - /** - * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z). - *

- * Note that >, >=, <, <=, = ... are considered generic functions and uses - * this method to find their selectivity. - * - * @param call - * @return - */ - private Double computeFunctionSelectivity(RexCall call) { - return 1 / getMaxNDV(call); - } - - /** - * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total - * number of tuples from child and m1 and m2 is the expected number of tuples - * from each part of the disjunction predicate. - *

- * Note we compute m1. m2.. by applying selectivity of the disjunctive element - * on the cardinality from child. - * - * @param call - * @return - */ - private Double computeDisjunctionSelectivity(RexCall call) { - Double tmpCardinality; - Double tmpSelectivity; - double selectivity = 1; - - for (RexNode dje : call.getOperands()) { - tmpSelectivity = dje.accept(this); - if (tmpSelectivity == null) { - tmpSelectivity = 0.99; - } - tmpCardinality = childCardinality * tmpSelectivity; - - if (tmpCardinality > 1 && tmpCardinality < childCardinality) { - tmpSelectivity = (1 - tmpCardinality / childCardinality); - } else { - tmpSelectivity = 1.0; - } - - selectivity *= tmpSelectivity; - } - - if (selectivity < 0.0) - selectivity = 0.0; - - return (1 - selectivity); - } - - /** - * Selectivity of conjunctive predicate -> (selectivity of conjunctive - * element1) * (selectivity of conjunctive element2)... - * - * @param call - * @return - */ - private Double computeConjunctionSelectivity(RexCall call) { - Double tmpSelectivity; - double selectivity = 1; - - for (RexNode cje : call.getOperands()) { - tmpSelectivity = cje.accept(this); - if (tmpSelectivity != null) { - selectivity *= tmpSelectivity; - } - } - - return selectivity; - } - - private Double getMaxNDV(RexCall call) { - double tmpNDV; - double maxNDV = 1.0; - InputReferencedVisitor irv; - - for (RexNode op : call.getOperands()) { - if (op instanceof RexInputRef) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, - ((RexInputRef) op).getIndex()); - if (tmpNDV > maxNDV) - maxNDV = tmpNDV; - } else { - irv = new InputReferencedVisitor(); - irv.apply(op); - for (Integer childProjIndx : irv.inputPosReferenced) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx); - if (tmpNDV > maxNDV) - maxNDV = tmpNDV; - } - } - } - - return maxNDV; - } - - private boolean isPartitionPredicate(RexNode expr, RelNode r) { - if (r instanceof ProjectRelBase) { - expr = RelOptUtil.pushFilterPastProject(expr, (ProjectRelBase) r); - return isPartitionPredicate(expr, ((ProjectRelBase) r).getChild()); - } else if (r instanceof FilterRelBase) { - return isPartitionPredicate(expr, ((FilterRelBase) r).getChild()); - } else if (r instanceof HiveTableScanRel) { - RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScanRel) r).getTable(); - BitSet cols = RelOptUtil.InputFinder.bits(expr); - return table.containsPartitionColumnsOnly(cols); - } - return false; - } - - private SqlKind getOp(RexCall call) { - SqlKind op = call.getKind(); - - if (call.getKind().equals(SqlKind.OTHER_FUNCTION) - && SqlTypeUtil.inBooleanFamily(call.getType())) { - SqlOperator sqlOp = call.getOperator(); - String opName = (sqlOp != null) ? sqlOp.getName() : ""; - if (opName.equalsIgnoreCase("in")) { - op = SqlKind.IN; - } - } - - return op; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java deleted file mode 100644 index 4be57b1..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.stats; - -import java.util.BitSet; -import java.util.List; - -import net.hydromatic.optiq.BuiltinMethod; - -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; -import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMdDistinctRowCount; -import org.eigenbase.rel.metadata.RelMdUtil; -import org.eigenbase.rel.metadata.RelMetadataProvider; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.relopt.RelOptCost; -import org.eigenbase.rex.RexNode; - -import com.google.common.collect.ImmutableList; - -public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount { - - private static final HiveRelMdDistinctRowCount INSTANCE = - new HiveRelMdDistinctRowCount(); - - public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider - .of(ImmutableList.of( - - ReflectiveRelMetadataProvider.reflectiveSource( - BuiltinMethod.DISTINCT_ROW_COUNT.method, INSTANCE), - - ReflectiveRelMetadataProvider.reflectiveSource( - BuiltinMethod.CUMULATIVE_COST.method, INSTANCE))); - - private HiveRelMdDistinctRowCount() { - } - - // Catch-all rule when none of the others apply. - @Override - public Double getDistinctRowCount(RelNode rel, BitSet groupKey, - RexNode predicate) { - if (rel instanceof HiveTableScanRel) { - return getDistinctRowCount((HiveTableScanRel) rel, groupKey, predicate); - } - /* - * For now use Optiq' default formulas for propagating NDVs up the Query - * Tree. - */ - return super.getDistinctRowCount(rel, groupKey, predicate); - } - - private Double getDistinctRowCount(HiveTableScanRel htRel, BitSet groupKey, - RexNode predicate) { - List projIndxLst = HiveOptiqUtil - .translateBitSetToProjIndx(groupKey); - List colStats = htRel.getColStat(projIndxLst); - Double noDistinctRows = 1.0; - for (ColStatistics cStat : colStats) { - noDistinctRows *= cStat.getCountDistint(); - } - - return Math.min(noDistinctRows, htRel.getRows()); - } - - public static Double getDistinctRowCount(RelNode r, int indx) { - BitSet bitSetOfRqdProj = new BitSet(); - bitSetOfRqdProj.set(indx); - return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r - .getCluster().getRexBuilder().makeLiteral(true)); - } - - @Override - public Double getDistinctRowCount(JoinRelBase rel, BitSet groupKey, - RexNode predicate) { - if (rel instanceof HiveJoinRel) { - HiveJoinRel hjRel = (HiveJoinRel) rel; - //TODO: Improve this - if (hjRel.isLeftSemiJoin()) { - return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, - rel.getCluster().getRexBuilder().makeLiteral(true)); - } else { - return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(), - groupKey, predicate, true); - } - } - - return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); - } - - /* - * Favor Broad Plans over Deep Plans. - */ - public RelOptCost getCumulativeCost(HiveJoinRel rel) { - RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel); - List inputs = rel.getInputs(); - RelOptCost maxICost = HiveCost.ZERO; - for (RelNode input : inputs) { - RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input); - if (maxICost.isLt(iCost)) { - maxICost = iCost; - } - } - return cost.plus(maxICost); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java deleted file mode 100644 index 8c7f643..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java +++ /dev/null @@ -1,443 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.optiq.stats; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.List; -import java.util.Set; - -import net.hydromatic.optiq.BuiltinMethod; -import net.hydromatic.optiq.util.BitSets; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.RelVisitor; -import org.eigenbase.rel.TableAccessRelBase; -import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMdRowCount; -import org.eigenbase.rel.metadata.RelMetadataProvider; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.rel.rules.SemiJoinRel; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.hep.HepRelVertex; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexUtil; -import org.eigenbase.sql.fun.SqlStdOperatorTable; -import org.eigenbase.util.Holder; -import org.eigenbase.util.Pair; - -public class HiveRelMdRowCount extends RelMdRowCount { - - protected static final Log LOG = LogFactory.getLog(HiveRelMdRowCount.class.getName()); - - - public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider - .reflectiveSource(BuiltinMethod.ROW_COUNT.method, new HiveRelMdRowCount()); - - protected HiveRelMdRowCount() { - super(); - } - - public Double getRowCount(JoinRelBase join) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(join); - if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); - selectivity = Math.min(1.0, selectivity); - if (LOG.isDebugEnabled()) { - LOG.debug("Identified Primary - Foreign Key relation:"); - LOG.debug(RelOptUtil.toString(join)); - LOG.debug(pkfk); - } - return pkfk.fkInfo.rowCount * selectivity; - } - return join.getRows(); - } - - public Double getRowCount(SemiJoinRel rel) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel); - if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); - selectivity = Math.min(1.0, selectivity); - if (LOG.isDebugEnabled()) { - LOG.debug("Identified Primary - Foreign Key relation:"); - LOG.debug(RelOptUtil.toString(rel)); - LOG.debug(pkfk); - } - return pkfk.fkInfo.rowCount * selectivity; - } - return super.getRowCount(rel); - } - - static class PKFKRelationInfo { - public final int fkSide; - public final double ndvScalingFactor; - public final FKSideInfo fkInfo; - public final PKSideInfo pkInfo; - public final boolean isPKSideSimple; - - PKFKRelationInfo(int fkSide, - FKSideInfo fkInfo, - PKSideInfo pkInfo, - double ndvScalingFactor, - boolean isPKSideSimple) { - this.fkSide = fkSide; - this.fkInfo = fkInfo; - this.pkInfo = pkInfo; - this.ndvScalingFactor = ndvScalingFactor; - this.isPKSideSimple = isPKSideSimple; - } - - public String toString() { - return String.format( - "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" + - "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n", - fkSide, - fkInfo, - pkInfo, - isPKSideSimple, - ndvScalingFactor); - } - } - - static class FKSideInfo { - public final double rowCount; - public final double distinctCount; - public FKSideInfo(double rowCount, double distinctCount) { - this.rowCount = rowCount; - this.distinctCount = distinctCount; - } - - public String toString() { - return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount); - } - } - - static class PKSideInfo extends FKSideInfo { - public final double selectivity; - public PKSideInfo(double rowCount, double distinctCount, double selectivity) { - super(rowCount, distinctCount); - this.selectivity = selectivity; - } - - public String toString() { - return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity); - } - } - - /* - * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can - * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is - * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor - * based on the Selectivity of the PK/Dim table side. - * - * 1. If both T1.x and T2.y are keys then use the larger one as the PK side. - * 2. In case of outer Joins: a) The FK side should be the Null Preserving - * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact - * or Fact roj Dim b) The selectivity factor applied on the Fact Table should - * be 1. - */ - public static PKFKRelationInfo analyzeJoinForPKFK(JoinRelBase joinRel) { - - RelNode left = joinRel.getInputs().get(0); - RelNode right = joinRel.getInputs().get(1); - - final List initJoinFilters = RelOptUtil.conjunctions(joinRel - .getCondition()); - - /* - * No joining condition. - */ - if (initJoinFilters.isEmpty()) { - return null; - } - - List leftFilters = new ArrayList(); - List rightFilters = new ArrayList(); - List joinFilters = new ArrayList(initJoinFilters); - final Holder joinTypeHolder = Holder.of(joinRel.getJoinType()); - - // @todo: remove this. 8/28/14 hb - // for now adding because RelOptUtil.classifyFilters has an assertion about - // column counts that is not true for semiJoins. - if (joinRel instanceof SemiJoinRel) { - return null; - } - - RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(), - false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel - .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters, - rightFilters); - - Pair joinCols = canHandleJoin(joinRel, leftFilters, - rightFilters, joinFilters); - if (joinCols == null) { - return null; - } - int leftColIdx = joinCols.left; - int rightColIdx = joinCols.right; - - RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - RexNode leftPred = RexUtil - .composeConjunction(rexBuilder, leftFilters, true); - RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, - true); - BitSet lBitSet = BitSets.of(leftColIdx); - BitSet rBitSet = BitSets.of(rightColIdx); - - /* - * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return - * null. - */ - boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.RIGHT) - && !(joinRel instanceof SemiJoinRel) && isKey(lBitSet, left); - boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right); - - if (!leftIsKey && !rightIsKey) { - return null; - } - - double leftRowCount = RelMetadataQuery.getRowCount(left); - double rightRowCount = RelMetadataQuery.getRowCount(right); - - if (leftIsKey && rightIsKey) { - if (rightRowCount < leftRowCount) { - leftIsKey = false; - } - } - - int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1; - - boolean isPKSideSimpleTree = pkSide != -1 ? - IsSimpleTreeOnJoinKey.check( - pkSide == 0 ? left : right, - pkSide == 0 ? leftColIdx : rightColIdx) : false; - - double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1; - double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1; - - /* - * If the ndv of the PK - FK side don't match, and the PK side is a filter - * on the Key column then scale the NDV on the FK side. - * - * As described by Peter Boncz: http://databasearchitects.blogspot.com/ - * in such cases we can be off by a large margin in the Join cardinality - * estimate. The e.g. he provides is on the join of StoreSales and DateDim - * on the TPCDS dataset. Since the DateDim is populated for 20 years into - * the future, while the StoreSales only has 5 years worth of data, there - * are 40 times fewer distinct dates in StoreSales. - * - * In general it is hard to infer the range for the foreign key on an - * arbitrary expression. For e.g. the NDV for DayofWeek is the same - * irrespective of NDV on the number of unique days, whereas the - * NDV of Quarters has the same ratio as the NDV on the keys. - * - * But for expressions that apply only on columns that have the same NDV - * as the key (implying that they are alternate keys) we can apply the - * ratio. So in the case of StoreSales - DateDim joins for predicate on the - * d_date column we can apply the scaling factor. - */ - double ndvScalingFactor = 1.0; - if ( isPKSideSimpleTree ) { - ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; - } - - if (pkSide == 0) { - FKSideInfo fkInfo = new FKSideInfo(rightRowCount, - rightNDV); - double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount); - PKSideInfo pkInfo = new PKSideInfo(leftRowCount, - leftNDV, - joinRel.getJoinType().generatesNullsOnRight() ? 1.0 : - pkSelectivity); - - return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); - } - - if (pkSide == 1) { - FKSideInfo fkInfo = new FKSideInfo(leftRowCount, - leftNDV); - double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount); - PKSideInfo pkInfo = new PKSideInfo(rightRowCount, - rightNDV, - joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : - pkSelectivity); - - return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); - } - - return null; - } - - private static double pkSelectivity(JoinRelBase joinRel, boolean leftChild, - RelNode child, - double childRowCount) { - if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) || - (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) { - return 1.0; - } else { - HiveTableScanRel tScan = HiveRelMdUniqueKeys.getTableScan(child, true); - if (tScan != null) { - double tRowCount = RelMetadataQuery.getRowCount(tScan); - return childRowCount / tRowCount; - } else { - return 1.0; - } - } - } - - private static boolean isKey(BitSet c, RelNode rel) { - boolean isKey = false; - Set keys = RelMetadataQuery.getUniqueKeys(rel); - if (keys != null) { - for (BitSet key : keys) { - if (key.equals(c)) { - isKey = true; - break; - } - } - } - return isKey; - } - - /* - * 1. Join condition must be an Equality Predicate. - * 2. both sides must reference 1 column. - * 3. If needed flip the columns. - */ - private static Pair canHandleJoin(JoinRelBase joinRel, - List leftFilters, List rightFilters, - List joinFilters) { - - /* - * If after classifying filters there is more than 1 joining predicate, we - * don't handle this. Return null. - */ - if (joinFilters.size() != 1) { - return null; - } - - RexNode joinCond = joinFilters.get(0); - - int leftColIdx; - int rightColIdx; - - if (!(joinCond instanceof RexCall)) { - return null; - } - - if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) { - return null; - } - - BitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0)); - BitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1)); - - if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) { - return null; - } - - int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size(); - int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size(); - int nSysFields = joinRel.getSystemFieldList().size(); - BitSet rightFieldsBitSet = BitSets.range(nSysFields + nFieldsLeft, - nSysFields + nFieldsLeft + nFieldsRight); - /* - * flip column references if join condition specified in reverse order to - * join sources. - */ - if (BitSets.contains(rightFieldsBitSet, leftCols)) { - BitSet t = leftCols; - leftCols = rightCols; - rightCols = t; - } - - leftColIdx = leftCols.nextSetBit(0) - nSysFields; - rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft); - - return new Pair(leftColIdx, rightColIdx); - } - - private static class IsSimpleTreeOnJoinKey extends RelVisitor { - - int joinKey; - boolean simpleTree; - - static boolean check(RelNode r, int joinKey) { - IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey); - v.go(r); - return v.simpleTree; - } - - IsSimpleTreeOnJoinKey(int joinKey) { - super(); - this.joinKey = joinKey; - simpleTree = true; - } - - @Override - public void visit(RelNode node, int ordinal, RelNode parent) { - - if (node instanceof HepRelVertex) { - node = ((HepRelVertex) node).getCurrentRel(); - } - - if (node instanceof TableAccessRelBase) { - simpleTree = true; - } else if (node instanceof ProjectRelBase) { - simpleTree = isSimple((ProjectRelBase) node); - } else if (node instanceof FilterRelBase) { - simpleTree = isSimple((FilterRelBase) node); - } else { - simpleTree = false; - } - - if (simpleTree) { - super.visit(node, ordinal, parent); - } - } - - private boolean isSimple(ProjectRelBase project) { - RexNode r = project.getProjects().get(joinKey); - if (r instanceof RexInputRef) { - joinKey = ((RexInputRef) r).getIndex(); - return true; - } - return false; - } - - private boolean isSimple(FilterRelBase filter) { - BitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); - return isKey(condBits, filter); - } - - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java deleted file mode 100644 index 49d2ee5..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java +++ /dev/null @@ -1,244 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.stats; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import net.hydromatic.optiq.BuiltinMethod; - -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil.JoinLeafPredicateInfo; -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil.JoinPredicateInfo; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMdSelectivity; -import org.eigenbase.rel.metadata.RelMdUtil; -import org.eigenbase.rel.metadata.RelMetadataProvider; -import org.eigenbase.rel.metadata.RelMetadataQuery; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexUtil; -import org.eigenbase.util.Pair; - -import com.google.common.collect.ImmutableMap; - -public class HiveRelMdSelectivity extends RelMdSelectivity { - public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( - BuiltinMethod.SELECTIVITY.method, - new HiveRelMdSelectivity()); - - protected HiveRelMdSelectivity() { - super(); - } - - public Double getSelectivity(HiveTableScanRel t, RexNode predicate) { - if (predicate != null) { - FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t); - return filterSelEstmator.estimateSelectivity(predicate); - } - - return 1.0; - } - - public Double getSelectivity(HiveJoinRel j, RexNode predicate) { - if (j.getJoinType().equals(JoinRelType.INNER)) { - return computeInnerJoinSelectivity(j, predicate); - } - return 1.0; - } - - private Double computeInnerJoinSelectivity(HiveJoinRel j, RexNode predicate) { - double ndvCrossProduct = 1; - Pair predInfo = - getCombinedPredicateForJoin(j, predicate); - if (!predInfo.getKey()) { - return - new FilterSelectivityEstimator(j). - estimateSelectivity(predInfo.getValue()); - } - - RexNode combinedPredicate = predInfo.getValue(); - JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, - combinedPredicate); - ImmutableMap.Builder colStatMapBuilder = ImmutableMap - .builder(); - ImmutableMap colStatMap; - int rightOffSet = j.getLeft().getRowType().getFieldCount(); - - // 1. Update Col Stats Map with col stats for columns from left side of - // Join which are part of join keys - for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { - colStatMapBuilder.put(ljk, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk)); - } - - // 2. Update Col Stats Map with col stats for columns from right side of - // Join which are part of join keys - for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { - colStatMapBuilder.put(rjk + rightOffSet, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk)); - } - colStatMap = colStatMapBuilder.build(); - - // 3. Walk through the Join Condition Building NDV for selectivity - // NDV of the join can not exceed the cardinality of cross join. - List peLst = jpi.getEquiJoinPredicateElements(); - int noOfPE = peLst.size(); - if (noOfPE > 0) { - ndvCrossProduct = exponentialBackoff(peLst, colStatMap); - - if (j.isLeftSemiJoin()) - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()), - ndvCrossProduct); - else - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()) - * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct); - } - - // 4. Join Selectivity = 1/NDV - return (1 / ndvCrossProduct); - } - - // 3.2 if conjunctive predicate elements are more than one, then walk - // through them one by one. Compute cross product of NDV. Cross product is - // computed by multiplying the largest NDV of all of the conjunctive - // predicate - // elements with degraded NDV of rest of the conjunctive predicate - // elements. NDV is - // degraded using log function.Finally the ndvCrossProduct is fenced at - // the join - // cross product to ensure that NDV can not exceed worst case join - // cardinality.
- // NDV of a conjunctive predicate element is the max NDV of all arguments - // to lhs, rhs expressions. - // NDV(JoinCondition) = min (left cardinality * right cardinality, - // ndvCrossProduct(JoinCondition)) - // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2)) - // where pex is the predicate element of join condition with max ndv. - // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr)) - // NDV(expr) = max(NDV( expr args)) - protected double logSmoothing(List peLst, ImmutableMap colStatMap) { - int noOfPE = peLst.size(); - double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap); - if (noOfPE > 1) { - double maxNDVSoFar = ndvCrossProduct; - double ndvToBeSmoothed; - double tmpNDV; - - for (int i = 1; i < noOfPE; i++) { - tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap); - if (tmpNDV > maxNDVSoFar) { - ndvToBeSmoothed = maxNDVSoFar; - maxNDVSoFar = tmpNDV; - ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV; - } else { - ndvToBeSmoothed = tmpNDV; - } - // TODO: revisit the fence - if (ndvToBeSmoothed > 3) - ndvCrossProduct *= Math.log(ndvToBeSmoothed); - else - ndvCrossProduct *= ndvToBeSmoothed; - } - } - return ndvCrossProduct; - } - - /* - * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct = - * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ... - */ - protected double exponentialBackoff(List peLst, - ImmutableMap colStatMap) { - int noOfPE = peLst.size(); - List ndvs = new ArrayList(noOfPE); - for (int i = 0; i < noOfPE; i++) { - ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap)); - } - Collections.sort(ndvs); - Collections.reverse(ndvs); - double ndvCrossProduct = 1.0; - for (int i = 0; i < ndvs.size(); i++) { - double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i)); - ndvCrossProduct *= n; - } - return ndvCrossProduct; - } - - /** - * - * @param j - * @param additionalPredicate - * @return if predicate is the join condition return (true, joinCond) - * else return (false, minusPred) - */ - private Pair getCombinedPredicateForJoin(HiveJoinRel j, RexNode additionalPredicate) { - RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate, - j.getCondition()); - - if (minusPred != null) { - List minusList = new ArrayList(); - minusList.add(j.getCondition()); - minusList.add(minusPred); - - return new Pair(false, minusPred); - } - - return new Pair(true,j.getCondition()); - } - - /** - * Compute Max NDV to determine Join Selectivity. - * - * @param jlpi - * @param colStatMap - * Immutable Map of Projection Index (in Join Schema) to Column Stat - * @param rightProjOffSet - * @return - */ - private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi, - ImmutableMap colStatMap) { - Double maxNDVSoFar = 1.0; - - maxNDVSoFar = getMaxNDVFromProjections(colStatMap, - jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar); - maxNDVSoFar = getMaxNDVFromProjections(colStatMap, - jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar); - - return maxNDVSoFar; - } - - private static Double getMaxNDVFromProjections(Map colStatMap, - Set projectionSet, Double defaultMaxNDV) { - Double colNDV = null; - Double maxNDVSoFar = defaultMaxNDV; - - for (Integer projIndx : projectionSet) { - colNDV = colStatMap.get(projIndx); - if (colNDV > maxNDVSoFar) - maxNDVSoFar = colNDV; - } - - return maxNDVSoFar; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java deleted file mode 100644 index c3c8bdd..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.optiq.stats; - -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import net.hydromatic.optiq.BuiltinMethod; -import net.hydromatic.optiq.util.BitSets; - -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.BuiltInMetadata; -import org.eigenbase.rel.metadata.Metadata; -import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMdUniqueKeys; -import org.eigenbase.rel.metadata.RelMetadataProvider; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.hep.HepRelVertex; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexNode; - -import com.google.common.base.Function; - -public class HiveRelMdUniqueKeys { - - public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider - .reflectiveSource(BuiltinMethod.UNIQUE_KEYS.method, - new HiveRelMdUniqueKeys()); - - /* - * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) - * - min(col) = rowCount(col) - * - * Why are we intercepting ProjectRelbase and not TableScan? Because if we - * have a method for TableScan, it will not know which columns to check for. - * Inferring Uniqueness for all columns is very expensive right now. The flip - * side of doing this is, it only works post Field Trimming. - */ - public Set getUniqueKeys(ProjectRelBase rel, boolean ignoreNulls) { - - HiveTableScanRel tScan = getTableScan(rel.getChild(), false); - - if ( tScan == null ) { - Function fn = RelMdUniqueKeys.SOURCE.apply( - rel.getClass(), BuiltInMetadata.UniqueKeys.class); - return ((BuiltInMetadata.UniqueKeys) fn.apply(rel)) - .getUniqueKeys(ignoreNulls); - } - - Map posMap = new HashMap(); - int projectPos = 0; - int colStatsPos = 0; - - BitSet projectedCols = new BitSet(); - for (RexNode r : rel.getProjects()) { - if (r instanceof RexInputRef) { - projectedCols.set(((RexInputRef) r).getIndex()); - posMap.put(colStatsPos, projectPos); - colStatsPos++; - } - projectPos++; - } - - double numRows = tScan.getRows(); - List colStats = tScan.getColStat(BitSets - .toList(projectedCols)); - Set keys = new HashSet(); - - colStatsPos = 0; - for (ColStatistics cStat : colStats) { - boolean isKey = false; - if (cStat.getCountDistint() >= numRows) { - isKey = true; - } - if ( !isKey && cStat.getRange() != null && - cStat.getRange().maxValue != null && - cStat.getRange().minValue != null) { - double r = cStat.getRange().maxValue.doubleValue() - - cStat.getRange().minValue.doubleValue() + 1; - isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); - } - if ( isKey ) { - BitSet key = new BitSet(); - key.set(posMap.get(colStatsPos)); - keys.add(key); - } - colStatsPos++; - } - - return keys; - } - - /* - * traverse a path of Filter, Projects to get to the TableScan. - * In case of Unique keys, stop if you reach a Project, it will be handled - * by the invocation on the Project. - * In case of getting the base rowCount of a Path, keep going past a Project. - */ - static HiveTableScanRel getTableScan(RelNode r, boolean traverseProject) { - - while (r != null && !(r instanceof HiveTableScanRel)) { - if (r instanceof HepRelVertex) { - r = ((HepRelVertex) r).getCurrentRel(); - } else if (r instanceof FilterRelBase) { - r = ((FilterRelBase) r).getChild(); - } else if (traverseProject && r instanceof ProjectRelBase) { - r = ((ProjectRelBase) r).getChild(); - } else { - r = null; - } - } - return r == null ? null : (HiveTableScanRel) r; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java deleted file mode 100644 index 16e0dd9..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java +++ /dev/null @@ -1,255 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Calendar; - -import net.hydromatic.avatica.ByteString; - -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.TableAccessRelBase; -import org.eigenbase.rex.RexLiteral; -import org.eigenbase.sql.type.SqlTypeName; - -class ASTBuilder { - - static ASTBuilder construct(int tokenType, String text) { - ASTBuilder b = new ASTBuilder(); - b.curr = createAST(tokenType, text); - return b; - } - - static ASTNode createAST(int tokenType, String text) { - return (ASTNode) ParseDriver.adaptor.create(tokenType, text); - } - - static ASTNode destNode() { - return ASTBuilder - .construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION") - .add( - ASTBuilder.construct(HiveParser.TOK_DIR, "TOK_DIR").add(HiveParser.TOK_TMP_FILE, - "TOK_TMP_FILE")).node(); - } - - static ASTNode table(TableAccessRelBase scan) { - RelOptHiveTable hTbl = (RelOptHiveTable) scan.getTable(); - ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABREF, "TOK_TABREF").add( - ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME") - .add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName()) - .add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName())); - - // NOTE: Optiq considers tbls to be equal if their names are the same. Hence - // we need to provide Optiq the fully qualified table name (dbname.tblname) - // and not the user provided aliases. - // However in HIVE DB name can not appear in select list; in case of join - // where table names differ only in DB name, Hive would require user - // introducing explicit aliases for tbl. - b.add(HiveParser.Identifier, hTbl.getTableAlias()); - return b.node(); - } - - static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond, - boolean semiJoin) { - ASTBuilder b = null; - - switch (joinType) { - case INNER: - if (semiJoin) { - b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN"); - } else { - b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN"); - } - break; - case LEFT: - b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN"); - break; - case RIGHT: - b = ASTBuilder.construct(HiveParser.TOK_RIGHTOUTERJOIN, "TOK_RIGHTOUTERJOIN"); - break; - case FULL: - b = ASTBuilder.construct(HiveParser.TOK_FULLOUTERJOIN, "TOK_FULLOUTERJOIN"); - break; - } - - b.add(left).add(right).add(cond); - return b.node(); - } - - static ASTNode subQuery(ASTNode qry, String alias) { - return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry) - .add(HiveParser.Identifier, alias).node(); - } - - static ASTNode qualifiedName(String tableName, String colName) { - ASTBuilder b = ASTBuilder - .construct(HiveParser.DOT, ".") - .add( - ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add( - HiveParser.Identifier, tableName)).add(HiveParser.Identifier, colName); - return b.node(); - } - - static ASTNode unqualifiedName(String colName) { - ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add( - HiveParser.Identifier, colName); - return b.node(); - } - - static ASTNode where(ASTNode cond) { - return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node(); - } - - static ASTNode having(ASTNode cond) { - return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node(); - } - - static ASTNode limit(Object value) { - return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT") - .add(HiveParser.Number, value.toString()).node(); - } - - static ASTNode selectExpr(ASTNode expr, String alias) { - return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr) - .add(HiveParser.Identifier, alias).node(); - } - - static ASTNode literal(RexLiteral literal) { - return literal(literal, false); - } - - static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) { - Object val = null; - int type = 0; - SqlTypeName sqlType = literal.getType().getSqlTypeName(); - - switch (sqlType) { - case BINARY: - ByteString bs = (ByteString) literal.getValue(); - val = bs.byteAt(0); - type = HiveParser.BigintLiteral; - break; - case TINYINT: - if (useTypeQualInLiteral) { - val = literal.getValue3() + "Y"; - } else { - val = literal.getValue3(); - } - type = HiveParser.TinyintLiteral; - break; - case SMALLINT: - if (useTypeQualInLiteral) { - val = literal.getValue3() + "S"; - } else { - val = literal.getValue3(); - } - type = HiveParser.SmallintLiteral; - break; - case INTEGER: - val = literal.getValue3(); - type = HiveParser.BigintLiteral; - break; - case BIGINT: - if (useTypeQualInLiteral) { - val = literal.getValue3() + "L"; - } else { - val = literal.getValue3(); - } - type = HiveParser.BigintLiteral; - break; - case DOUBLE: - val = literal.getValue3() + "D"; - type = HiveParser.Number; - break; - case DECIMAL: - val = literal.getValue3() + "BD"; - type = HiveParser.DecimalLiteral; - break; - case FLOAT: - case REAL: - val = literal.getValue3(); - type = HiveParser.Number; - break; - case VARCHAR: - case CHAR: - val = literal.getValue3(); - String escapedVal = BaseSemanticAnalyzer.escapeSQLString(String.valueOf(val)); - type = HiveParser.StringLiteral; - val = "'" + escapedVal + "'"; - break; - case BOOLEAN: - val = literal.getValue3(); - type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE; - break; - case DATE: { - val = literal.getValue(); - type = HiveParser.TOK_DATELITERAL; - DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); - val = df.format(((Calendar) val).getTime()); - val = "'" + val + "'"; - } - break; - case TIME: - case TIMESTAMP: { - val = literal.getValue(); - type = HiveParser.TOK_TIMESTAMPLITERAL; - DateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); - val = df.format(((Calendar) val).getTime()); - val = "'" + val + "'"; - } - break; - case NULL: - type = HiveParser.TOK_NULL; - break; - - default: - throw new RuntimeException("Unsupported Type: " + sqlType); - } - - return (ASTNode) ParseDriver.adaptor.create(type, String.valueOf(val)); - } - - ASTNode curr; - - ASTNode node() { - return curr; - } - - ASTBuilder add(int tokenType, String text) { - ParseDriver.adaptor.addChild(curr, createAST(tokenType, text)); - return this; - } - - ASTBuilder add(ASTBuilder b) { - ParseDriver.adaptor.addChild(curr, b.curr); - return this; - } - - ASTBuilder add(ASTNode n) { - if (n != null) { - ParseDriver.adaptor.addChild(curr, n); - } - return this; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java deleted file mode 100644 index a217d70..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java +++ /dev/null @@ -1,670 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import net.hydromatic.optiq.util.BitSets; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter.HiveToken; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.eigenbase.rel.AggregateCall; -import org.eigenbase.rel.AggregateRelBase; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelFieldCollation; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.RelVisitor; -import org.eigenbase.rel.SortRel; -import org.eigenbase.rel.TableAccessRelBase; -import org.eigenbase.rel.UnionRelBase; -import org.eigenbase.rel.rules.SemiJoinRel; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexFieldAccess; -import org.eigenbase.rex.RexFieldCollation; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexLiteral; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexOver; -import org.eigenbase.rex.RexVisitorImpl; -import org.eigenbase.rex.RexWindow; -import org.eigenbase.rex.RexWindowBound; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.sql.SqlOperator; -import org.eigenbase.sql.type.SqlTypeName; - -import com.google.common.collect.Iterables; - -public class ASTConverter { - private static final Log LOG = LogFactory.getLog(ASTConverter.class); - - private RelNode root; - private HiveAST hiveAST; - private RelNode from; - private FilterRelBase where; - private AggregateRelBase groupBy; - private FilterRelBase having; - private ProjectRelBase select; - private SortRel order; - private SortRel limit; - - private Schema schema; - - private long derivedTableCount; - - ASTConverter(RelNode root, long dtCounterInitVal) { - this.root = root; - hiveAST = new HiveAST(); - this.derivedTableCount = dtCounterInitVal; - } - - public static ASTNode convert(final RelNode relNode, List resultSchema) - throws OptiqSemanticException { - RelNode root = PlanModifierForASTConv.convertOpTree(relNode, resultSchema); - ASTConverter c = new ASTConverter(root, 0); - return c.convert(); - } - - private ASTNode convert() { - /* - * 1. Walk RelNode Graph; note from, where, gBy.. nodes. - */ - new QBVisitor().go(root); - - /* - * 2. convert from node. - */ - QueryBlockInfo qb = convertSource(from); - schema = qb.schema; - hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node(); - - /* - * 3. convert filterNode - */ - if (where != null) { - ASTNode cond = where.getCondition().accept(new RexVisitor(schema)); - hiveAST.where = ASTBuilder.where(cond); - } - - /* - * 4. GBy - */ - if (groupBy != null) { - ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); - for (int i : BitSets.toIter(groupBy.getGroupSet())) { - RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); - b.add(iRef.accept(new RexVisitor(schema))); - } - - if (!groupBy.getGroupSet().isEmpty()) - hiveAST.groupBy = b.node(); - schema = new Schema(schema, groupBy); - } - - /* - * 5. Having - */ - if (having != null) { - ASTNode cond = having.getCondition().accept(new RexVisitor(schema)); - hiveAST.having = ASTBuilder.having(cond); - } - - /* - * 6. Project - */ - ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT"); - - if (select.getChildExps().isEmpty()) { - RexLiteral r = select.getCluster().getRexBuilder().makeExactLiteral(new BigDecimal(1)); - ASTNode selectExpr = ASTBuilder.selectExpr(ASTBuilder.literal(r), "1"); - b.add(selectExpr); - } else { - int i = 0; - - for (RexNode r : select.getChildExps()) { - ASTNode selectExpr = ASTBuilder.selectExpr(r.accept( - new RexVisitor(schema, r instanceof RexLiteral)), - select.getRowType().getFieldNames().get(i++)); - b.add(selectExpr); - } - } - hiveAST.select = b.node(); - - /* - * 7. Order Use in Order By from the block above. RelNode has no pointer to - * parent hence we need to go top down; but OB at each block really belong - * to its src/from. Hence the need to pass in sortRel for each block from - * its parent. - */ - convertOBToASTNode((HiveSortRel) order); - - // 8. Limit - convertLimitToASTNode((HiveSortRel) limit); - - return hiveAST.getAST(); - } - - private void convertLimitToASTNode(HiveSortRel limit) { - if (limit != null) { - HiveSortRel hiveLimit = (HiveSortRel) limit; - RexNode limitExpr = hiveLimit.getFetchExpr(); - if (limitExpr != null) { - Object val = ((RexLiteral) limitExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(val); - } - } - } - - private void convertOBToASTNode(HiveSortRel order) { - if (order != null) { - HiveSortRel hiveSort = (HiveSortRel) order; - if (!hiveSort.getCollation().getFieldCollations().isEmpty()) { - // 1 Add order by token - ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - - schema = new Schema((HiveSortRel) hiveSort); - Map obRefToCallMap = hiveSort.getInputRefToCallMap(); - RexNode obExpr; - ASTNode astCol; - for (RelFieldCollation c : hiveSort.getCollation().getFieldCollations()) { - - // 2 Add Direction token - ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - - // 3 Convert OB expr (OB Expr is usually an input ref except for top - // level OB; top level OB will have RexCall kept in a map.) - obExpr = null; - if (obRefToCallMap != null) - obExpr = obRefToCallMap.get(c.getFieldIndex()); - - if (obExpr != null) { - astCol = obExpr.accept(new RexVisitor(schema)); - } else { - ColumnInfo cI = schema.get(c.getFieldIndex()); - /* - * The RowResolver setup for Select drops Table associations. So - * setup ASTNode on unqualified name. - */ - astCol = ASTBuilder.unqualifiedName(cI.column); - } - - // 4 buildup the ob expr AST - directionAST.addChild(astCol); - orderAst.addChild(directionAST); - } - hiveAST.order = orderAst; - } - } - } - - private Schema getRowSchema(String tblAlias) { - return new Schema(select, tblAlias); - } - - private QueryBlockInfo convertSource(RelNode r) { - Schema s; - ASTNode ast; - - if (r instanceof TableAccessRelBase) { - TableAccessRelBase f = (TableAccessRelBase) r; - s = new Schema(f); - ast = ASTBuilder.table(f); - } else if (r instanceof JoinRelBase) { - JoinRelBase join = (JoinRelBase) r; - QueryBlockInfo left = convertSource(join.getLeft()); - QueryBlockInfo right = convertSource(join.getRight()); - s = new Schema(left.schema, right.schema); - ASTNode cond = join.getCondition().accept(new RexVisitor(s)); - boolean semiJoin = join instanceof SemiJoinRel; - ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin); - if (semiJoin) - s = left.schema; - } else if (r instanceof UnionRelBase) { - RelNode leftInput = ((UnionRelBase) r).getInput(0); - RelNode rightInput = ((UnionRelBase) r).getInput(1); - - ASTConverter leftConv = new ASTConverter(leftInput, this.derivedTableCount); - ASTConverter rightConv = new ASTConverter(rightInput, this.derivedTableCount); - ASTNode leftAST = leftConv.convert(); - ASTNode rightAST = rightConv.convert(); - - ASTNode unionAST = getUnionAllAST(leftAST, rightAST); - - String sqAlias = nextAlias(); - ast = ASTBuilder.subQuery(unionAST, sqAlias); - s = new Schema((UnionRelBase) r, sqAlias); - } else { - ASTConverter src = new ASTConverter(r, this.derivedTableCount); - ASTNode srcAST = src.convert(); - String sqAlias = nextAlias(); - s = src.getRowSchema(sqAlias); - ast = ASTBuilder.subQuery(srcAST, sqAlias); - } - return new QueryBlockInfo(s, ast); - } - - class QBVisitor extends RelVisitor { - - public void handle(FilterRelBase filter) { - RelNode child = filter.getChild(); - if (child instanceof AggregateRelBase && !((AggregateRelBase) child).getGroupSet().isEmpty()) { - ASTConverter.this.having = filter; - } else { - ASTConverter.this.where = filter; - } - } - - public void handle(ProjectRelBase project) { - if (ASTConverter.this.select == null) { - ASTConverter.this.select = project; - } else { - ASTConverter.this.from = project; - } - } - - @Override - public void visit(RelNode node, int ordinal, RelNode parent) { - - if (node instanceof TableAccessRelBase) { - ASTConverter.this.from = node; - } else if (node instanceof FilterRelBase) { - handle((FilterRelBase) node); - } else if (node instanceof ProjectRelBase) { - handle((ProjectRelBase) node); - } else if (node instanceof JoinRelBase) { - ASTConverter.this.from = node; - } else if (node instanceof UnionRelBase) { - ASTConverter.this.from = node; - } else if (node instanceof AggregateRelBase) { - ASTConverter.this.groupBy = (AggregateRelBase) node; - } else if (node instanceof SortRel) { - if (ASTConverter.this.select != null) { - ASTConverter.this.from = node; - } else { - SortRel hiveSortRel = (SortRel) node; - if (hiveSortRel.getCollation().getFieldCollations().isEmpty()) - ASTConverter.this.limit = hiveSortRel; - else - ASTConverter.this.order = hiveSortRel; - } - } - /* - * once the source node is reached; stop traversal for this QB - */ - if (ASTConverter.this.from == null) { - node.childrenAccept(this); - } - } - - } - - static class RexVisitor extends RexVisitorImpl { - - private final Schema schema; - private boolean useTypeQualInLiteral; - - protected RexVisitor(Schema schema) { - this(schema, false); - } - - protected RexVisitor(Schema schema, boolean useTypeQualInLiteral) { - super(true); - this.schema = schema; - this.useTypeQualInLiteral = useTypeQualInLiteral; - } - - @Override - public ASTNode visitFieldAccess(RexFieldAccess fieldAccess) { - return ASTBuilder.construct(HiveParser.DOT, ".").add(super.visitFieldAccess(fieldAccess)) - .add(HiveParser.Identifier, fieldAccess.getField().getName()).node(); - } - - @Override - public ASTNode visitInputRef(RexInputRef inputRef) { - ColumnInfo cI = schema.get(inputRef.getIndex()); - if (cI.agg != null) { - return (ASTNode) ParseDriver.adaptor.dupTree(cI.agg); - } - - if (cI.table == null || cI.table.isEmpty()) - return ASTBuilder.unqualifiedName(cI.column); - else - return ASTBuilder.qualifiedName(cI.table, cI.column); - - } - - @Override - public ASTNode visitLiteral(RexLiteral literal) { - return ASTBuilder.literal(literal, useTypeQualInLiteral); - } - - private ASTNode getPSpecAST(RexWindow window) { - ASTNode pSpecAst = null; - - ASTNode dByAst = null; - if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { - dByAst = ASTBuilder.createAST(HiveParser.TOK_DISTRIBUTEBY, "TOK_DISTRIBUTEBY"); - for (RexNode pk : window.partitionKeys) { - ASTNode astCol = pk.accept(this); - dByAst.addChild(astCol); - } - } - - ASTNode oByAst = null; - if (window.orderKeys != null && !window.orderKeys.isEmpty()) { - oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - for (RexFieldCollation ok : window.orderKeys) { - ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - ASTNode astCol = ok.left.accept(this); - astNode.addChild(astCol); - oByAst.addChild(astNode); - } - } - - if (dByAst != null || oByAst != null) { - pSpecAst = ASTBuilder.createAST(HiveParser.TOK_PARTITIONINGSPEC, "TOK_PARTITIONINGSPEC"); - if (dByAst != null) - pSpecAst.addChild(dByAst); - if (oByAst != null) - pSpecAst.addChild(oByAst); - } - - return pSpecAst; - } - - private ASTNode getWindowBound(RexWindowBound wb) { - ASTNode wbAST = null; - - if (wb.isCurrentRow()) { - wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT"); - } else { - if (wb.isPreceding()) - wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING"); - else - wbAST = ASTBuilder.createAST(HiveParser.KW_FOLLOWING, "FOLLOWING"); - if (wb.isUnbounded()) { - wbAST.addChild(ASTBuilder.createAST(HiveParser.KW_UNBOUNDED, "UNBOUNDED")); - } else { - ASTNode offset = wb.getOffset().accept(this); - wbAST.addChild(offset); - } - } - - return wbAST; - } - - private ASTNode getWindowRangeAST(RexWindow window) { - ASTNode wRangeAst = null; - - ASTNode startAST = null; - RexWindowBound ub = window.getUpperBound(); - if (ub != null) { - startAST = getWindowBound(ub); - } - - ASTNode endAST = null; - RexWindowBound lb = window.getLowerBound(); - if (lb != null) { - endAST = getWindowBound(lb); - } - - if (startAST != null || endAST != null) { - // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) - if (window.isRows()) - wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE"); - else - wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES"); - if (startAST != null) - wRangeAst.addChild(startAST); - if (endAST != null) - wRangeAst.addChild(endAST); - } - - return wRangeAst; - } - - @Override - public ASTNode visitOver(RexOver over) { - if (!deep) { - return null; - } - - // 1. Translate the UDAF - final ASTNode wUDAFAst = visitCall(over); - - // 2. Add TOK_WINDOW as child of UDAF - ASTNode wSpec = ASTBuilder.createAST(HiveParser.TOK_WINDOWSPEC, "TOK_WINDOWSPEC"); - wUDAFAst.addChild(wSpec); - - // 3. Add Part Spec & Range Spec as child of TOK_WINDOW - final RexWindow window = over.getWindow(); - final ASTNode wPSpecAst = getPSpecAST(window); - final ASTNode wRangeAst = getWindowRangeAST(window); - if (wPSpecAst != null) - wSpec.addChild(wPSpecAst); - if (wRangeAst != null) - wSpec.addChild(wRangeAst); - - return wUDAFAst; - } - - @Override - public ASTNode visitCall(RexCall call) { - if (!deep) { - return null; - } - - SqlOperator op = call.getOperator(); - List astNodeLst = new LinkedList(); - if (op.kind == SqlKind.CAST) { - HiveToken ht = TypeConverter.hiveToken(call.getType()); - ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); - if (ht.args != null) { - for (String castArg : ht.args) - astBldr.add(HiveParser.Identifier, castArg); - } - astNodeLst.add(astBldr.node()); - } - - for (RexNode operand : call.operands) { - astNodeLst.add(operand.accept(this)); - } - - if (isFlat(call)) - return SqlFunctionConverter.buildAST(op, astNodeLst, 0); - else - return SqlFunctionConverter.buildAST(op, astNodeLst); - } - } - - static class QueryBlockInfo { - Schema schema; - ASTNode ast; - - public QueryBlockInfo(Schema schema, ASTNode ast) { - super(); - this.schema = schema; - this.ast = ast; - } - } - - /* - * represents the schema exposed by a QueryBlock. - */ - static class Schema extends ArrayList { - - private static final long serialVersionUID = 1L; - - Schema(TableAccessRelBase scan) { - String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias(); - for (RelDataTypeField field : scan.getRowType().getFieldList()) { - add(new ColumnInfo(tabName, field.getName())); - } - } - - Schema(ProjectRelBase select, String alias) { - for (RelDataTypeField field : select.getRowType().getFieldList()) { - add(new ColumnInfo(alias, field.getName())); - } - } - - Schema(UnionRelBase unionRel, String alias) { - for (RelDataTypeField field : unionRel.getRowType().getFieldList()) { - add(new ColumnInfo(alias, field.getName())); - } - } - - @SuppressWarnings("unchecked") - Schema(Schema left, Schema right) { - for (ColumnInfo cI : Iterables.concat(left, right)) { - add(cI); - } - } - - Schema(Schema src, AggregateRelBase gBy) { - for (int i : BitSets.toIter(gBy.getGroupSet())) { - ColumnInfo cI = src.get(i); - add(cI); - } - List aggs = gBy.getAggCallList(); - for (AggregateCall agg : aggs) { - int argCount = agg.getArgList().size(); - ASTBuilder b = agg.isDistinct() ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONDI, - "TOK_FUNCTIONDI") : argCount == 0 ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONSTAR, - "TOK_FUNCTIONSTAR") : ASTBuilder.construct(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); - b.add(HiveParser.Identifier, agg.getAggregation().getName()); - for (int i : agg.getArgList()) { - RexInputRef iRef = new RexInputRef(i, gBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); - b.add(iRef.accept(new RexVisitor(src))); - } - add(new ColumnInfo(null, b.node())); - } - } - - /** - * Assumption:
- * 1. ProjectRel will always be child of SortRel.
- * 2. In Optiq every projection in ProjectRelBase is uniquely named - * (unambigous) without using table qualifier (table name).
- * - * @param order - * Hive Sort Rel Node - * @return Schema - */ - public Schema(HiveSortRel order) { - ProjectRelBase select = (ProjectRelBase) order.getChild(); - for (String projName : select.getRowType().getFieldNames()) { - add(new ColumnInfo(null, projName)); - } - } - } - - /* - * represents Column information exposed by a QueryBlock. - */ - static class ColumnInfo { - String table; - String column; - ASTNode agg; - - ColumnInfo(String table, String column) { - super(); - this.table = table; - this.column = column; - } - - ColumnInfo(String table, ASTNode agg) { - super(); - this.table = table; - this.agg = agg; - } - - ColumnInfo(String alias, ColumnInfo srcCol) { - this.table = alias; - this.column = srcCol.column; - this.agg = srcCol.agg; - } - } - - private String nextAlias() { - String tabAlias = String.format("$hdt$_%d", derivedTableCount); - derivedTableCount++; - return tabAlias; - } - - static class HiveAST { - - ASTNode from; - ASTNode where; - ASTNode groupBy; - ASTNode having; - ASTNode select; - ASTNode order; - ASTNode limit; - - public ASTNode getAST() { - ASTBuilder b = ASTBuilder - .construct(HiveParser.TOK_QUERY, "TOK_QUERY") - .add(from) - .add( - ASTBuilder.construct(HiveParser.TOK_INSERT, "TOK_INSERT").add(ASTBuilder.destNode()) - .add(select).add(where).add(groupBy).add(having).add(order).add(limit)); - return b.node(); - } - } - - public ASTNode getUnionAllAST(ASTNode leftAST, ASTNode rightAST) { - - ASTNode unionTokAST = ASTBuilder.construct(HiveParser.TOK_UNION, "TOK_UNION").add(leftAST) - .add(rightAST).node(); - - return unionTokAST; - } - - public static boolean isFlat(RexCall call) { - boolean flat = false; - if (call.operands != null && call.operands.size() > 2) { - SqlOperator op = call.getOperator(); - if (op.getKind() == SqlKind.AND || op.getKind() == SqlKind.OR) { - flat = true; - } - } - - return flat; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java deleted file mode 100644 index ad34e1e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java +++ /dev/null @@ -1,168 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.LinkedList; -import java.util.List; - -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexLiteral; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexVisitorImpl; - -/* - * convert a RexNode to an ExprNodeDesc - */ -public class ExprNodeConverter extends RexVisitorImpl { - - RelDataType rType; - String tabAlias; - boolean partitioningExpr; - - public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) { - super(true); - /* - * hb: 6/25/14 for now we only support expressions that only contain - * partition cols. there is no use case for supporting generic expressions. - * for supporting generic exprs., we need to give the converter information - * on whether a column is a partition column or not, whether a column is a - * virtual column or not. - */ - assert partitioningExpr == true; - this.tabAlias = tabAlias; - this.rType = rType; - this.partitioningExpr = partitioningExpr; - } - - @Override - public ExprNodeDesc visitInputRef(RexInputRef inputRef) { - RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); - return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias, - partitioningExpr); - } - - @Override - public ExprNodeDesc visitCall(RexCall call) { - ExprNodeGenericFuncDesc gfDesc = null; - - if (!deep) { - return null; - } - - List args = new LinkedList(); - - for (RexNode operand : call.operands) { - args.add(operand.accept(this)); - } - - // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the - // exprnode - if (ASTConverter.isFlat(call)) { - ArrayList tmpExprArgs = new ArrayList(); - tmpExprArgs.addAll(args.subList(0, 2)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); - for (int i = 2; i < call.operands.size(); i++) { - tmpExprArgs = new ArrayList(); - tmpExprArgs.add(gfDesc); - tmpExprArgs.add(args.get(i)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); - } - } else { - GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF( - call.getOperator(), call.getType(), args.size()); - if (hiveUdf == null) { - throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + call.getOperator() - + "[" + call.getOperator().getKind() + "]/" + args.size()); - } - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), hiveUdf, args); - } - - return gfDesc; - } - - @Override - public ExprNodeDesc visitLiteral(RexLiteral literal) { - RelDataType lType = literal.getType(); - - switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral - .booleanValue(literal))); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal - .getValue3()).byteValue())); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, - Short.valueOf(((Number) literal.getValue3()).shortValue())); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, - Integer.valueOf(((Number) literal.getValue3()).intValue())); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal - .getValue3()).longValue())); - case FLOAT: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, - Float.valueOf(((Number) literal.getValue3()).floatValue())); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, - Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); - case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); - } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); - } - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); - case DECIMAL: - return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), - lType.getScale()), literal.getValue3()); - case VARCHAR: - return new ExprNodeConstantDesc(TypeInfoFactory.getVarcharTypeInfo(lType.getPrecision()), - new HiveVarchar((String) literal.getValue3(), lType.getPrecision())); - case CHAR: - return new ExprNodeConstantDesc(TypeInfoFactory.getCharTypeInfo(lType.getPrecision()), - new HiveChar((String) literal.getValue3(), lType.getPrecision())); - case OTHER: - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); - } - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java deleted file mode 100644 index 89c57b9..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java +++ /dev/null @@ -1,316 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; -import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; - -/** - * JoinCondTypeCheckProcFactory is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST. - * Reasons for sub class: - * 1. Additional restrictions on what is supported in Join Conditions - * 2. Column handling is different - * 3. Join Condn expr has two input RR as opposed to one. - */ - -/** - * TODO:
- * 1. Could we use combined RR instead of list of RR ?
- * 2. Use Column Processing from TypeCheckProcFactory
- * 3. Why not use GB expr ? - */ -public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory { - - public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) - throws SemanticException { - return TypeCheckProcFactory.genExprNode(expr, tcCtx, new JoinCondTypeCheckProcFactory()); - } - - /** - * Processor for table columns. - */ - public static class JoinCondColumnExprProcessor extends ColumnExprProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null; - - if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); - return null; - } - - assert (expr.getChildCount() == 1); - String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); - - boolean qualifiedAccess = (parent != null && parent.getType() == HiveParser.DOT); - - ColumnInfo colInfo = null; - if (!qualifiedAccess) { - colInfo = getColInfo(ctx, null, tableOrCol, expr); - // It's a column. - return new ExprNodeColumnDesc(colInfo); - } else if (hasTableAlias(ctx, tableOrCol, expr)) { - return null; - } else { - // Qualified column access for which table was not found - throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(expr)); - } - } - - private static boolean hasTableAlias(JoinTypeCheckCtx ctx, String tabName, ASTNode expr) - throws SemanticException { - int tblAliasCnt = 0; - for (RowResolver rr : ctx.getInputRRList()) { - if (rr.hasTableAlias(tabName)) - tblAliasCnt++; - } - - if (tblAliasCnt > 1) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - - return (tblAliasCnt == 1) ? true : false; - } - - private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, - ASTNode expr) throws SemanticException { - ColumnInfo tmp; - ColumnInfo cInfoToRet = null; - - for (RowResolver rr : ctx.getInputRRList()) { - tmp = rr.get(tabName, colAlias); - if (tmp != null) { - if (cInfoToRet != null) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - cInfoToRet = tmp; - } - } - - return cInfoToRet; - } - } - - /** - * Factory method to get ColumnExprProcessor. - * - * @return ColumnExprProcessor. - */ - @Override - public ColumnExprProcessor getColumnExprProcessor() { - return new JoinCondColumnExprProcessor(); - } - - /** - * The default processor for typechecking. - */ - public static class JoinCondDefaultExprProcessor extends DefaultExprProcessor { - @Override - protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { - JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; - List possibleColumnNames = new ArrayList(); - for (RowResolver rr : jCtx.getInputRRList()) { - possibleColumnNames.addAll(rr.getReferenceableColumnAliases(null, -1)); - } - - return possibleColumnNames; - } - - @Override - protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, - Object... nodeOutputs) throws SemanticException { - String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) - .getText()); - // NOTE: tableAlias must be a valid non-ambiguous table alias, - // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias, - ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); - - if (colInfo == null) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), tableAlias, - colInfo.getIsVirtualCol()); - } - - private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, - ASTNode expr) throws SemanticException { - ColumnInfo tmp; - ColumnInfo cInfoToRet = null; - - for (RowResolver rr : ctx.getInputRRList()) { - tmp = rr.get(tabName, colAlias); - if (tmp != null) { - if (cInfoToRet != null) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - cInfoToRet = tmp; - } - } - - return cInfoToRet; - } - - @Override - protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, - List children, GenericUDF genericUDF) throws SemanticException { - super.validateUDF(expr, isFunction, ctx, fi, children, genericUDF); - - JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; - - // Join Condition can not contain disjunctions - if (genericUDF instanceof GenericUDFOPOr) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(expr)); - } - - // Non Conjunctive elements have further limitations in Join conditions - if (!(genericUDF instanceof GenericUDFOPAnd)) { - // Non Comparison UDF other than 'and' can not use inputs from both side - if (!(genericUDF instanceof GenericUDFBaseCompare)) { - if (genericUDFargsRefersToBothInput(genericUDF, children, jCtx.getInputRRList())) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - } else if (genericUDF instanceof GenericUDFBaseCompare) { - // Comparisons of non literals LHS/RHS can not refer to inputs from - // both sides - if (children.size() == 2 && !(children.get(0) instanceof ExprNodeConstantDesc) - && !(children.get(1) instanceof ExprNodeConstantDesc)) { - if (comparisonUDFargsRefersToBothInput((GenericUDFBaseCompare) genericUDF, children, - jCtx.getInputRRList())) { - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - } - } - } - } - - private static boolean genericUDFargsRefersToBothInput(GenericUDF udf, - List children, List inputRRList) { - boolean argsRefersToBothInput = false; - - Map hasCodeToColDescMap = new HashMap(); - for (ExprNodeDesc child : children) { - ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap); - } - Set inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList); - - if (inputRef.size() > 1) - argsRefersToBothInput = true; - - return argsRefersToBothInput; - } - - private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare comparisonUDF, - List children, List inputRRList) { - boolean argsRefersToBothInput = false; - - Map lhsHashCodeToColDescMap = new HashMap(); - Map rhsHashCodeToColDescMap = new HashMap(); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap); - Set lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList); - Set rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList); - - if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1) - argsRefersToBothInput = true; - - return argsRefersToBothInput; - } - - private static Set getInputRef(Collection colDescSet, - List inputRRList) { - String tableAlias; - RowResolver inputRR; - Set inputLineage = new HashSet(); - - for (ExprNodeDesc col : colDescSet) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col; - tableAlias = colDesc.getTabAlias(); - - for (int i = 0; i < inputRRList.size(); i++) { - inputRR = inputRRList.get(i); - - // If table Alias is present check if InputRR has that table and then - // check for internal name - // else if table alias is null then check with internal name in all - // inputRR. - if (tableAlias != null) { - if (inputRR.hasTableAlias(tableAlias)) { - if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { - inputLineage.add(i); - } - } - } else { - if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { - inputLineage.add(i); - } - } - } - } - - return inputLineage; - } - } - - /** - * Factory method to get DefaultExprProcessor. - * - * @return DefaultExprProcessor. - */ - @Override - public DefaultExprProcessor getDefaultExprProcessor() { - return new JoinCondDefaultExprProcessor(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java deleted file mode 100644 index fdee66b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.util.List; - -import org.apache.hadoop.hive.ql.parse.JoinType; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; - -import com.google.common.collect.ImmutableList; - -/** - * JoinTypeCheckCtx is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST. - * Reasons for sub class: - * 1. Join Conditions can not handle: - * a. Stateful Functions - * b. Distinct - * c. '*' expr - * d. '.*' expr - * e. Windowing expr - * f. Complex type member access - * g. Array Index Access - * h. Sub query - * i. GB expr elimination - * 2. Join Condn expr has two input RR as opposed to one. - */ - -/** - * TODO:
- * 1. Could we use combined RR instead of list of RR ?
- * 2. Why not use GB expr ? - */ -public class JoinTypeCheckCtx extends TypeCheckCtx { - private final ImmutableList inputRRLst; - private final boolean outerJoin; - - public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType) - throws SemanticException { - super(RowResolver.getCombinedRR(leftRR, rightRR), false, false, false, false, false, false, - false, false); - this.inputRRLst = ImmutableList.of(leftRR, rightRR); - this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER) - || (hiveJoinType == JoinType.FULLOUTER); - } - - /** - * @return the inputRR List - */ - public List getInputRRList() { - return inputRRLst; - } - - public boolean isOuterJoin() { - return outerJoin; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java deleted file mode 100644 index 57f030b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java +++ /dev/null @@ -1,397 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.eigenbase.rel.AggregateCall; -import org.eigenbase.rel.AggregateRelBase; -import org.eigenbase.rel.Aggregation; -import org.eigenbase.rel.EmptyRel; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.OneRowRelBase; -import org.eigenbase.rel.ProjectRelBase; -import org.eigenbase.rel.RelCollationImpl; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.SetOpRel; -import org.eigenbase.rel.SingleRel; -import org.eigenbase.rel.SortRel; -import org.eigenbase.rel.rules.MultiJoinRel; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.hep.HepRelVertex; -import org.eigenbase.relopt.volcano.RelSubset; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeFactory; -import org.eigenbase.rex.RexNode; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.util.Pair; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; - -public class PlanModifierForASTConv { - private static final Log LOG = LogFactory.getLog(PlanModifierForASTConv.class); - - public static RelNode convertOpTree(RelNode rel, List resultSchema) - throws OptiqSemanticException { - RelNode newTopNode = rel; - if (LOG.isDebugEnabled()) { - LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); - } - - if (!(newTopNode instanceof ProjectRelBase) && !(newTopNode instanceof SortRel)) { - newTopNode = introduceDerivedTable(newTopNode); - if (LOG.isDebugEnabled()) { - LOG.debug("Plan after top-level introduceDerivedTable\n " - + RelOptUtil.toString(newTopNode)); - } - } - - convertOpTree(newTopNode, (RelNode) null); - if (LOG.isDebugEnabled()) { - LOG.debug("Plan after nested convertOpTree\n " + RelOptUtil.toString(newTopNode)); - } - - Pair topSelparentPair = HiveOptiqUtil.getTopLevelSelect(newTopNode); - fixTopOBSchema(newTopNode, topSelparentPair, resultSchema); - if (LOG.isDebugEnabled()) { - LOG.debug("Plan after fixTopOBSchema\n " + RelOptUtil.toString(newTopNode)); - } - - topSelparentPair = HiveOptiqUtil.getTopLevelSelect(newTopNode); - newTopNode = renameTopLevelSelectInResultSchema(newTopNode, topSelparentPair, resultSchema); - if (LOG.isDebugEnabled()) { - LOG.debug("Final plan after modifier\n " + RelOptUtil.toString(newTopNode)); - } - return newTopNode; - } - - private static void convertOpTree(RelNode rel, RelNode parent) { - - if (rel instanceof EmptyRel) { - throw new RuntimeException("Found Empty Rel"); - } else if (rel instanceof HepRelVertex) { - throw new RuntimeException("Found HepRelVertex"); - } else if (rel instanceof JoinRelBase) { - if (!validJoinParent(rel, parent)) { - introduceDerivedTable(rel, parent); - } - } else if (rel instanceof MultiJoinRel) { - throw new RuntimeException("Found MultiJoinRel"); - } else if (rel instanceof OneRowRelBase) { - throw new RuntimeException("Found OneRowRelBase"); - } else if (rel instanceof RelSubset) { - throw new RuntimeException("Found RelSubset"); - } else if (rel instanceof SetOpRel) { - // TODO: Handle more than 2 inputs for setop - if (!validSetopParent(rel, parent)) - introduceDerivedTable(rel, parent); - - SetOpRel setopRel = (SetOpRel) rel; - for (RelNode inputRel : setopRel.getInputs()) { - if (!validSetopChild(inputRel)) { - introduceDerivedTable(inputRel, setopRel); - } - } - } else if (rel instanceof SingleRel) { - if (rel instanceof FilterRelBase) { - if (!validFilterParent(rel, parent)) { - introduceDerivedTable(rel, parent); - } - } else if (rel instanceof HiveSortRel) { - if (!validSortParent(rel, parent)) { - introduceDerivedTable(rel, parent); - } - if (!validSortChild((HiveSortRel) rel)) { - introduceDerivedTable(((HiveSortRel) rel).getChild(), rel); - } - } else if (rel instanceof HiveAggregateRel) { - RelNode newParent = parent; - if (!validGBParent(rel, parent)) { - newParent = introduceDerivedTable(rel, parent); - } - // check if groupby is empty and there is no other cols in aggr - // this should only happen when newParent is constant. - if (isEmptyGrpAggr(rel)) { - replaceEmptyGroupAggr(rel, newParent); - } - } - } - - List childNodes = rel.getInputs(); - if (childNodes != null) { - for (RelNode r : childNodes) { - convertOpTree(r, rel); - } - } - } - - private static void fixTopOBSchema(final RelNode rootRel, - Pair topSelparentPair, List resultSchema) - throws OptiqSemanticException { - if (!(topSelparentPair.getKey() instanceof SortRel) - || !HiveOptiqUtil.orderRelNode(topSelparentPair.getKey())) { - return; - } - HiveSortRel obRel = (HiveSortRel) topSelparentPair.getKey(); - ProjectRelBase obChild = (ProjectRelBase) topSelparentPair.getValue(); - if (obChild.getRowType().getFieldCount() <= resultSchema.size()) { - return; - } - - RelDataType rt = obChild.getRowType(); - @SuppressWarnings({ "unchecked", "rawtypes" }) - Set collationInputRefs = new HashSet( - RelCollationImpl.ordinals(obRel.getCollation())); - ImmutableMap.Builder inputRefToCallMapBldr = ImmutableMap.builder(); - for (int i = resultSchema.size(); i < rt.getFieldCount(); i++) { - if (collationInputRefs.contains(i)) { - inputRefToCallMapBldr.put(i, obChild.getChildExps().get(i)); - } - } - ImmutableMap inputRefToCallMap = inputRefToCallMapBldr.build(); - - if ((obChild.getRowType().getFieldCount() - inputRefToCallMap.size()) != resultSchema.size()) { - LOG.error(generateInvalidSchemaMessage(obChild, resultSchema, inputRefToCallMap.size())); - throw new OptiqSemanticException("Result Schema didn't match Optimized Op Tree Schema"); - } - // This removes order-by only expressions from the projections. - HiveProjectRel replacementProjectRel = HiveProjectRel.create(obChild.getChild(), obChild - .getChildExps().subList(0, resultSchema.size()), obChild.getRowType().getFieldNames() - .subList(0, resultSchema.size())); - obRel.replaceInput(0, replacementProjectRel); - obRel.setInputRefToCallMap(inputRefToCallMap); - } - - private static String generateInvalidSchemaMessage(ProjectRelBase topLevelProj, - List resultSchema, int fieldsForOB) { - String errorDesc = "Result Schema didn't match Optiq Optimized Op Tree; schema: "; - for (FieldSchema fs : resultSchema) { - errorDesc += "[" + fs.getName() + ":" + fs.getType() + "], "; - } - errorDesc += " projection fields: "; - for (RexNode exp : topLevelProj.getChildExps()) { - errorDesc += "[" + exp.toString() + ":" + exp.getType() + "], "; - } - if (fieldsForOB != 0) { - errorDesc += fieldsForOB + " fields removed due to ORDER BY "; - } - return errorDesc.substring(0, errorDesc.length() - 2); - } - - private static RelNode renameTopLevelSelectInResultSchema(final RelNode rootRel, - Pair topSelparentPair, List resultSchema) - throws OptiqSemanticException { - RelNode parentOforiginalProjRel = topSelparentPair.getKey(); - HiveProjectRel originalProjRel = (HiveProjectRel) topSelparentPair.getValue(); - - // Assumption: top portion of tree could only be - // (limit)?(OB)?(ProjectRelBase).... - List rootChildExps = originalProjRel.getChildExps(); - if (resultSchema.size() != rootChildExps.size()) { - // Safeguard against potential issues in CBO RowResolver construction. Disable CBO for now. - LOG.error(generateInvalidSchemaMessage(originalProjRel, resultSchema, 0)); - throw new OptiqSemanticException("Result Schema didn't match Optimized Op Tree Schema"); - } - - List newSelAliases = new ArrayList(); - String colAlias; - for (int i = 0; i < rootChildExps.size(); i++) { - colAlias = resultSchema.get(i).getName(); - if (colAlias.startsWith("_")) { - colAlias = colAlias.substring(1); - } - newSelAliases.add(colAlias); - } - - HiveProjectRel replacementProjectRel = HiveProjectRel.create(originalProjRel.getChild(), - originalProjRel.getChildExps(), newSelAliases); - - if (rootRel == originalProjRel) { - return replacementProjectRel; - } else { - parentOforiginalProjRel.replaceInput(0, replacementProjectRel); - return rootRel; - } - } - - private static RelNode introduceDerivedTable(final RelNode rel) { - List projectList = HiveOptiqUtil.getProjsFromBelowAsInputRef(rel); - - HiveProjectRel select = HiveProjectRel.create(rel.getCluster(), rel, projectList, - rel.getRowType(), rel.getCollationList()); - - return select; - } - - private static RelNode introduceDerivedTable(final RelNode rel, RelNode parent) { - int i = 0; - int pos = -1; - List childList = parent.getInputs(); - - for (RelNode child : childList) { - if (child == rel) { - pos = i; - break; - } - i++; - } - - if (pos == -1) { - throw new RuntimeException("Couldn't find child node in parent's inputs"); - } - - RelNode select = introduceDerivedTable(rel); - - parent.replaceInput(pos, select); - - return select; - } - - private static boolean validJoinParent(RelNode joinNode, RelNode parent) { - boolean validParent = true; - - if (parent instanceof JoinRelBase) { - if (((JoinRelBase) parent).getRight() == joinNode) { - validParent = false; - } - } else if (parent instanceof SetOpRel) { - validParent = false; - } - - return validParent; - } - - private static boolean validFilterParent(RelNode filterNode, RelNode parent) { - boolean validParent = true; - - // TOODO: Verify GB having is not a seperate filter (if so we shouldn't - // introduce derived table) - if (parent instanceof FilterRelBase || parent instanceof JoinRelBase - || parent instanceof SetOpRel) { - validParent = false; - } - - return validParent; - } - - private static boolean validGBParent(RelNode gbNode, RelNode parent) { - boolean validParent = true; - - // TOODO: Verify GB having is not a seperate filter (if so we shouldn't - // introduce derived table) - if (parent instanceof JoinRelBase || parent instanceof SetOpRel - || parent instanceof AggregateRelBase - || (parent instanceof FilterRelBase && ((AggregateRelBase) gbNode).getGroupSet().isEmpty())) { - validParent = false; - } - - return validParent; - } - - private static boolean validSortParent(RelNode sortNode, RelNode parent) { - boolean validParent = true; - - if (parent != null && !(parent instanceof ProjectRelBase) - && !((parent instanceof SortRel) || HiveOptiqUtil.orderRelNode(parent))) - validParent = false; - - return validParent; - } - - private static boolean validSortChild(HiveSortRel sortNode) { - boolean validChild = true; - RelNode child = sortNode.getChild(); - - if (!(HiveOptiqUtil.limitRelNode(sortNode) && HiveOptiqUtil.orderRelNode(child)) - && !(child instanceof ProjectRelBase)) { - validChild = false; - } - - return validChild; - } - - private static boolean validSetopParent(RelNode setop, RelNode parent) { - boolean validChild = true; - - if (parent != null && !(parent instanceof ProjectRelBase)) { - validChild = false; - } - - return validChild; - } - - private static boolean validSetopChild(RelNode setopChild) { - boolean validChild = true; - - if (!(setopChild instanceof ProjectRelBase)) { - validChild = false; - } - - return validChild; - } - - private static boolean isEmptyGrpAggr(RelNode gbNode) { - // Verify if both groupset and aggrfunction are empty) - AggregateRelBase aggrnode = (AggregateRelBase) gbNode; - if (aggrnode.getGroupSet().isEmpty() && aggrnode.getAggCallList().isEmpty()) { - return true; - } - return false; - } - - private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) { - // If this function is called, the parent should only include constant - List exps = parent.getChildExps(); - for (RexNode rexNode : exps) { - if (rexNode.getKind() != SqlKind.LITERAL) { - throw new RuntimeException("We expect " + parent.toString() - + " to contain only constants. However, " + rexNode.toString() + " is " - + rexNode.getKind()); - } - } - HiveAggregateRel oldAggRel = (HiveAggregateRel) rel; - RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory(); - RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory); - RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory); - // Create the dummy aggregation. - Aggregation countFn = (Aggregation) SqlFunctionConverter.getOptiqAggFn("count", - ImmutableList.of(intType), longType); - // TODO: Using 0 might be wrong; might need to walk down to find the - // proper index of a dummy. - List argList = ImmutableList.of(0); - AggregateCall dummyCall = new AggregateCall(countFn, false, argList, longType, null); - AggregateRelBase newAggRel = oldAggRel.copy(oldAggRel.getTraitSet(), oldAggRel.getChild(), - oldAggRel.getGroupSet(), ImmutableList.of(dummyCall)); - RelNode select = introduceDerivedTable(newAggRel); - parent.replaceInput(0, select); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java deleted file mode 100644 index 6b7aa8b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java +++ /dev/null @@ -1,426 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import net.hydromatic.avatica.ByteString; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.Decimal128; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.eigenbase.rel.RelNode; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeFactory; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.rex.RexCall; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexUtil; -import org.eigenbase.sql.SqlOperator; -import org.eigenbase.sql.fun.SqlCastFunction; -import org.eigenbase.sql.type.SqlTypeName; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; - -public class RexNodeConverter { - private static final Log LOG = LogFactory.getLog(RexNodeConverter.class); - - private static class InputCtx { - private final RelDataType optiqInpDataType; - private final ImmutableMap hiveNameToPosMap; - private final RowResolver hiveRR; - private final int offsetInOptiqSchema; - - private InputCtx(RelDataType optiqInpDataType, ImmutableMap hiveNameToPosMap, - RowResolver hiveRR, int offsetInOptiqSchema) { - this.optiqInpDataType = optiqInpDataType; - this.hiveNameToPosMap = hiveNameToPosMap; - this.hiveRR = hiveRR; - this.offsetInOptiqSchema = offsetInOptiqSchema; - } - }; - - private final RelOptCluster cluster; - private final ImmutableList inputCtxs; - private final boolean flattenExpr; - - public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); - this.flattenExpr = flattenExpr; - } - - public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); - this.flattenExpr = flattenExpr; - } - - public RexNode convert(ExprNodeDesc expr) throws SemanticException { - if (expr instanceof ExprNodeNullDesc) { - return createNullLiteral(expr); - } else if (expr instanceof ExprNodeGenericFuncDesc) { - return convert((ExprNodeGenericFuncDesc) expr); - } else if (expr instanceof ExprNodeConstantDesc) { - return convert((ExprNodeConstantDesc) expr); - } else if (expr instanceof ExprNodeColumnDesc) { - return convert((ExprNodeColumnDesc) expr); - } else if (expr instanceof ExprNodeFieldDesc) { - return convert((ExprNodeFieldDesc) expr); - } else { - throw new RuntimeException("Unsupported Expression"); - } - // TODO: handle ExprNodeColumnListDesc - } - - private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { - RexNode rexNode = convert(fieldDesc.getDesc()); - if (rexNode instanceof RexCall) { - // regular case of accessing nested field in a column - return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); - } else { - // This may happen for schema-less tables, where columns are dynamically - // supplied by serdes. - throw new OptiqSemanticException("Unexpected rexnode : " - + rexNode.getClass().getCanonicalName()); - } - } - - private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException { - ExprNodeDesc tmpExprNode; - RexNode tmpRN; - - List childRexNodeLst = new LinkedList(); - Builder argTypeBldr = ImmutableList. builder(); - - // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. - TypeInfo tgtDT = null; - GenericUDF tgtUdf = func.getGenericUDF(); - boolean isNumeric = tgtUdf instanceof GenericUDFBaseNumeric, - isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; - if (isNumeric) { - tgtDT = func.getTypeInfo(); - - assert func.getChildren().size() == 2; - // TODO: checking 2 children is useless, compare already does that. - } else if (isCompare && (func.getChildren().size() == 2)) { - tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) - .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); - } - - - for (ExprNodeDesc childExpr : func.getChildren()) { - tmpExprNode = childExpr; - if (tgtDT != null - && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { - if (isCompare) { - // For compare, we will convert requisite children - tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); - } else if (isNumeric) { - // For numeric, we'll do minimum necessary cast - if we cast to the type - // of expression, bad things will happen. - GenericUDFBaseNumeric numericUdf = (GenericUDFBaseNumeric)tgtUdf; - PrimitiveTypeInfo minArgType = numericUdf.deriveMinArgumentCast(childExpr, tgtDT); - tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType); - } else { - throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare"); - } - - } - argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); - tmpRN = convert(tmpExprNode); - childRexNodeLst.add(tmpRN); - } - - // See if this is an explicit cast. - RexNode expr = null; - RelDataType retType = null; - expr = handleExplicitCast(func, childRexNodeLst); - - if (expr == null) { - // This is not a cast; process the function. - retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); - SqlOperator optiqOp = SqlFunctionConverter.getOptiqOperator(func.getFuncText(), - func.getGenericUDF(), argTypeBldr.build(), retType); - expr = cluster.getRexBuilder().makeCall(optiqOp, childRexNodeLst); - } else { - retType = expr.getType(); - } - - // TODO: Cast Function in Optiq have a bug where it infertype on cast throws - // an exception - if (flattenExpr && (expr instanceof RexCall) - && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { - RexCall call = (RexCall) expr; - expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), - RexUtil.flatten(call.getOperands(), call.getOperator())); - } - - return expr; - } - - private boolean castExprUsingUDFBridge(GenericUDF gUDF) { - boolean castExpr = false; - if (gUDF != null && gUDF instanceof GenericUDFBridge) { - String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName(); - if (udfClassName != null) { - int sp = udfClassName.lastIndexOf('.'); - // TODO: add method to UDFBridge to say if it is a cast func - if (sp >= 0 & (sp + 1) < udfClassName.length()) { - udfClassName = udfClassName.substring(sp + 1); - if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte") - || udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger") - || udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort") - || udfClassName.equals("UDFToFloat") || udfClassName.equals("UDFToString")) - castExpr = true; - } - } - } - - return castExpr; - } - - private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List childRexNodeLst) - throws OptiqSemanticException { - RexNode castExpr = null; - - if (childRexNodeLst != null && childRexNodeLst.size() == 1) { - GenericUDF udf = func.getGenericUDF(); - if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar) - || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) - || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { - castExpr = cluster.getRexBuilder().makeAbstractCast( - TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), - childRexNodeLst.get(0)); - } - } - - return castExpr; - } - - private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { - InputCtx ctxLookingFor = null; - - if (inputCtxs.size() == 1) { - ctxLookingFor = inputCtxs.get(0); - } else { - String tableAlias = col.getTabAlias(); - String colAlias = col.getColumn(); - int noInp = 0; - for (InputCtx ic : inputCtxs) { - if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) { - if (ic.hiveRR.getPosition(colAlias) >= 0) { - ctxLookingFor = ic; - noInp++; - } - } - } - - if (noInp > 1) - throw new RuntimeException("Ambigous column mapping"); - } - - return ctxLookingFor; - } - - protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { - InputCtx ic = getInputCtx(col); - int pos = ic.hiveNameToPosMap.get(col.getColumn()); - return cluster.getRexBuilder().makeInputRef( - ic.optiqInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInOptiqSchema); - } - - private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE), - MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE); - - protected RexNode convert(ExprNodeConstantDesc literal) throws OptiqSemanticException { - RexBuilder rexBuilder = cluster.getRexBuilder(); - RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); - PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo(); - RelDataType optiqDataType = TypeConverter.convert(hiveType, dtFactory); - - PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory(); - - ConstantObjectInspector coi = literal.getWritableObjectInspector(); - Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), - coi); - - RexNode optiqLiteral = null; - // TODO: Verify if we need to use ConstantObjectInspector to unwrap data - switch (hiveTypeCategory) { - case BOOLEAN: - optiqLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue()); - break; - case BYTE: - byte[] byteArray = new byte[] { (Byte) value }; - ByteString bs = new ByteString(byteArray); - optiqLiteral = rexBuilder.makeBinaryLiteral(bs); - break; - case SHORT: - optiqLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), optiqDataType); - break; - case INT: - optiqLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value)); - break; - case LONG: - optiqLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value)); - break; - // TODO: is Decimal an exact numeric or approximate numeric? - case DECIMAL: - if (value instanceof HiveDecimal) { - value = ((HiveDecimal) value).bigDecimalValue(); - } else if (value instanceof Decimal128) { - value = ((Decimal128) value).toBigDecimal(); - } - if (value == null) { - // We have found an invalid decimal value while enforcing precision and - // scale. Ideally, - // we would replace it with null here, which is what Hive does. However, - // we need to plumb - // this thru up somehow, because otherwise having different expression - // type in AST causes - // the plan generation to fail after CBO, probably due to some residual - // state in SA/QB. - // For now, we will not run CBO in the presence of invalid decimal - // literals. - throw new OptiqSemanticException("Expression " + literal.getExprString() - + " is not a valid decimal"); - // TODO: return createNullLiteral(literal); - } - BigDecimal bd = (BigDecimal) value; - BigInteger unscaled = bd.unscaledValue(); - if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) { - optiqLiteral = rexBuilder.makeExactLiteral(bd); - } else { - // CBO doesn't support unlimited precision decimals. In practice, this - // will work... - // An alternative would be to throw CboSemanticException and fall back - // to no CBO. - RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, - bd.scale(), unscaled.toString().length()); - optiqLiteral = rexBuilder.makeExactLiteral(bd, relType); - } - break; - case FLOAT: - optiqLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Float) value), optiqDataType); - break; - case DOUBLE: - optiqLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Double) value), optiqDataType); - break; - case CHAR: - if (value instanceof HiveChar) - value = ((HiveChar) value).getValue(); - optiqLiteral = rexBuilder.makeLiteral((String) value); - break; - case VARCHAR: - if (value instanceof HiveVarchar) - value = ((HiveVarchar) value).getValue(); - optiqLiteral = rexBuilder.makeLiteral((String) value); - break; - case STRING: - optiqLiteral = rexBuilder.makeLiteral((String) value); - break; - case DATE: - Calendar cal = new GregorianCalendar(); - cal.setTime((Date) value); - optiqLiteral = rexBuilder.makeDateLiteral(cal); - break; - case TIMESTAMP: - Calendar c = null; - if (value instanceof Calendar) { - c = (Calendar)value; - } else { - c = Calendar.getInstance(); - c.setTimeInMillis(((Timestamp)value).getTime()); - } - optiqLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); - break; - case BINARY: - case VOID: - case UNKNOWN: - default: - throw new RuntimeException("UnSupported Literal"); - } - - return optiqLiteral; - } - - private RexNode createNullLiteral(ExprNodeDesc expr) throws OptiqSemanticException { - return cluster.getRexBuilder().makeNullLiteral( - TypeConverter.convert(expr.getTypeInfo(), cluster.getTypeFactory()).getSqlTypeName()); - } - - public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode, - List inputRels, LinkedHashMap relToHiveRR, - Map> relToHiveColNameOptiqPosMap, boolean flattenExpr) - throws SemanticException { - List inputCtxLst = new ArrayList(); - - int offSet = 0; - for (RelNode r : inputRels) { - inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameOptiqPosMap.get(r), relToHiveRR - .get(r), offSet)); - offSet += r.getRowType().getFieldCount(); - } - - return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java deleted file mode 100644 index 11242e2..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java +++ /dev/null @@ -1,418 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.lang.annotation.Annotation; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.udf.SettableUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPositive; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeFactory; -import org.eigenbase.sql.SqlAggFunction; -import org.eigenbase.sql.SqlFunction; -import org.eigenbase.sql.SqlFunctionCategory; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.sql.SqlOperator; -import org.eigenbase.sql.fun.SqlStdOperatorTable; -import org.eigenbase.sql.type.InferTypes; -import org.eigenbase.sql.type.OperandTypes; -import org.eigenbase.sql.type.ReturnTypes; -import org.eigenbase.sql.type.SqlOperandTypeChecker; -import org.eigenbase.sql.type.SqlOperandTypeInference; -import org.eigenbase.sql.type.SqlReturnTypeInference; -import org.eigenbase.sql.type.SqlTypeFamily; -import org.eigenbase.util.Util; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; - -public class SqlFunctionConverter { - private static final Log LOG = LogFactory.getLog(SqlFunctionConverter.class); - - static final Map hiveToOptiq; - static final Map optiqToHiveToken; - static final Map reverseOperatorMap; - - static { - StaticBlockBuilder builder = new StaticBlockBuilder(); - hiveToOptiq = ImmutableMap.copyOf(builder.hiveToOptiq); - optiqToHiveToken = ImmutableMap.copyOf(builder.optiqToHiveToken); - reverseOperatorMap = ImmutableMap.copyOf(builder.reverseOperatorMap); - } - - public static SqlOperator getOptiqOperator(String funcTextName, GenericUDF hiveUDF, - ImmutableList optiqArgTypes, RelDataType retType) throws OptiqSemanticException { - // handle overloaded methods first - if (hiveUDF instanceof GenericUDFOPNegative) { - return SqlStdOperatorTable.UNARY_MINUS; - } else if (hiveUDF instanceof GenericUDFOPPositive) { - return SqlStdOperatorTable.UNARY_PLUS; - } // do generic lookup - String name = null; - if (StringUtils.isEmpty(funcTextName)) { - name = getName(hiveUDF); // this should probably never happen, see getName - // comment - LOG.warn("The function text was empty, name from annotation is " + name); - } else { - // We could just do toLowerCase here and let SA qualify it, but let's be - // proper... - name = FunctionRegistry.getNormalizedFunctionName(funcTextName); - } - return getOptiqFn(name, optiqArgTypes, retType); - } - - public static GenericUDF getHiveUDF(SqlOperator op, RelDataType dt, int argsLength) { - String name = reverseOperatorMap.get(op); - if (name == null) { - name = op.getName(); - } - // Make sure we handle unary + and - correctly. - if (argsLength == 1) { - if (name == "+") { - name = FunctionRegistry.UNARY_PLUS_FUNC_NAME; - } else if (name == "-") { - name = FunctionRegistry.UNARY_MINUS_FUNC_NAME; - } - } - FunctionInfo hFn; - try { - hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; - } catch (SemanticException e) { - LOG.warn("Failed to load udf " + name, e); - hFn = null; - } - if (hFn == null) { - try { - hFn = handleExplicitCast(op, dt); - } catch (SemanticException e) { - LOG.warn("Failed to load udf " + name, e); - hFn = null; - } - } - return hFn == null ? null : hFn.getGenericUDF(); - } - - private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) throws SemanticException { - FunctionInfo castUDF = null; - - if (op.kind == SqlKind.CAST) { - TypeInfo castType = TypeConverter.convert(dt); - - if (castType.equals(TypeInfoFactory.byteTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("tinyint"); - } else if (castType instanceof CharTypeInfo) { - castUDF = handleCastForParameterizedType(castType, FunctionRegistry.getFunctionInfo("char")); - } else if (castType instanceof VarcharTypeInfo) { - castUDF = handleCastForParameterizedType(castType, - FunctionRegistry.getFunctionInfo("varchar")); - } else if (castType.equals(TypeInfoFactory.stringTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("string"); - } else if (castType.equals(TypeInfoFactory.booleanTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("boolean"); - } else if (castType.equals(TypeInfoFactory.shortTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("smallint"); - } else if (castType.equals(TypeInfoFactory.intTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("int"); - } else if (castType.equals(TypeInfoFactory.longTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("bigint"); - } else if (castType.equals(TypeInfoFactory.floatTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("float"); - } else if (castType.equals(TypeInfoFactory.doubleTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("double"); - } else if (castType.equals(TypeInfoFactory.timestampTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("timestamp"); - } else if (castType.equals(TypeInfoFactory.dateTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("date"); - } else if (castType instanceof DecimalTypeInfo) { - castUDF = handleCastForParameterizedType(castType, - FunctionRegistry.getFunctionInfo("decimal")); - } else if (castType.equals(TypeInfoFactory.binaryTypeInfo)) { - castUDF = FunctionRegistry.getFunctionInfo("binary"); - } else - throw new IllegalStateException("Unexpected type : " + castType.getQualifiedName()); - } - - return castUDF; - } - - private static FunctionInfo handleCastForParameterizedType(TypeInfo ti, FunctionInfo fi) { - SettableUDF udf = (SettableUDF) fi.getGenericUDF(); - try { - udf.setTypeInfo(ti); - } catch (UDFArgumentException e) { - throw new RuntimeException(e); - } - return new FunctionInfo(fi.isNative(), fi.getDisplayName(), (GenericUDF) udf); - } - - // TODO: 1) handle Agg Func Name translation 2) is it correct to add func args - // as child of func? - public static ASTNode buildAST(SqlOperator op, List children) { - HiveToken hToken = optiqToHiveToken.get(op); - ASTNode node; - if (hToken != null) { - node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text); - } else { - node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); - if (op.kind != SqlKind.CAST) { - if (op.kind == SqlKind.MINUS_PREFIX) { - node = (ASTNode) ParseDriver.adaptor.create(HiveParser.MINUS, "MINUS"); - } else if (op.kind == SqlKind.PLUS_PREFIX) { - node = (ASTNode) ParseDriver.adaptor.create(HiveParser.PLUS, "PLUS"); - } else { - if (op.getName().toUpperCase().equals(SqlStdOperatorTable.COUNT.getName()) - && children.size() == 0) { - node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, - "TOK_FUNCTIONSTAR"); - } - node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName())); - } - } - } - - for (ASTNode c : children) { - ParseDriver.adaptor.addChild(node, c); - } - return node; - } - - /** - * Build AST for flattened Associative expressions ('and', 'or'). Flattened - * expressions is of the form or[x,y,z] which is originally represented as - * "or[x, or[y, z]]". - */ - public static ASTNode buildAST(SqlOperator op, List children, int i) { - if (i + 1 < children.size()) { - HiveToken hToken = optiqToHiveToken.get(op); - ASTNode curNode = ((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); - ParseDriver.adaptor.addChild(curNode, children.get(i)); - ParseDriver.adaptor.addChild(curNode, buildAST(op, children, i + 1)); - return curNode; - } else { - return children.get(i); - } - - } - - // TODO: this is not valid. Function names for built-in UDFs are specified in - // FunctionRegistry, - // and only happen to match annotations. For user UDFs, the name is what user - // specifies at - // creation time (annotation can be absent, different, or duplicate some other - // function). - private static String getName(GenericUDF hiveUDF) { - String udfName = null; - if (hiveUDF instanceof GenericUDFBridge) { - udfName = ((GenericUDFBridge) hiveUDF).getUdfName(); - } else { - Class udfClass = hiveUDF.getClass(); - Annotation udfAnnotation = udfClass.getAnnotation(Description.class); - - if (udfAnnotation != null && udfAnnotation instanceof Description) { - Description udfDescription = (Description) udfAnnotation; - udfName = udfDescription.name(); - if (udfName != null) { - String[] aliases = udfName.split(","); - if (aliases.length > 0) - udfName = aliases[0]; - } - } - - if (udfName == null || udfName.isEmpty()) { - udfName = hiveUDF.getClass().getName(); - int indx = udfName.lastIndexOf("."); - if (indx >= 0) { - indx += 1; - udfName = udfName.substring(indx); - } - } - } - - return udfName; - } - - /** This class is used to build immutable hashmaps in the static block above. */ - private static class StaticBlockBuilder { - final Map hiveToOptiq = Maps.newHashMap(); - final Map optiqToHiveToken = Maps.newHashMap(); - final Map reverseOperatorMap = Maps.newHashMap(); - - StaticBlockBuilder() { - registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+")); - registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-")); - registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*")); - registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.STAR, "/")); - registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.STAR, "%")); - registerFunction("and", SqlStdOperatorTable.AND, hToken(HiveParser.KW_AND, "and")); - registerFunction("or", SqlStdOperatorTable.OR, hToken(HiveParser.KW_OR, "or")); - registerFunction("=", SqlStdOperatorTable.EQUALS, hToken(HiveParser.EQUAL, "=")); - registerFunction("<", SqlStdOperatorTable.LESS_THAN, hToken(HiveParser.LESSTHAN, "<")); - registerFunction("<=", SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - hToken(HiveParser.LESSTHANOREQUALTO, "<=")); - registerFunction(">", SqlStdOperatorTable.GREATER_THAN, hToken(HiveParser.GREATERTHAN, ">")); - registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, - hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); - registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); - registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); - } - - private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) { - reverseOperatorMap.put(optiqFn, name); - FunctionInfo hFn; - try { - hFn = FunctionRegistry.getFunctionInfo(name); - } catch (SemanticException e) { - LOG.warn("Failed to load udf " + name, e); - hFn = null; - } - if (hFn != null) { - String hFnName = getName(hFn.getGenericUDF()); - hiveToOptiq.put(hFnName, optiqFn); - - if (hiveToken != null) { - optiqToHiveToken.put(optiqFn, hiveToken); - } - } - } - } - - private static HiveToken hToken(int type, String text) { - return new HiveToken(type, text); - } - - public static class OptiqUDAF extends SqlAggFunction { - final ImmutableList argTypes; - final RelDataType retType; - - public OptiqUDAF(String opName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, - ImmutableList argTypes, RelDataType retType) { - super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference, - operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION); - this.argTypes = argTypes; - this.retType = retType; - } - - @Override - public List getParameterTypes(final RelDataTypeFactory typeFactory) { - return this.argTypes; - } - - @Override - public RelDataType getReturnType(final RelDataTypeFactory typeFactory) { - return this.retType; - } - } - - private static class OptiqUDFInfo { - private String udfName; - private SqlReturnTypeInference returnTypeInference; - private SqlOperandTypeInference operandTypeInference; - private SqlOperandTypeChecker operandTypeChecker; - private ImmutableList argTypes; - private RelDataType retType; - } - - private static OptiqUDFInfo getUDFInfo(String hiveUdfName, - ImmutableList optiqArgTypes, RelDataType optiqRetType) { - OptiqUDFInfo udfInfo = new OptiqUDFInfo(); - udfInfo.udfName = hiveUdfName; - udfInfo.returnTypeInference = ReturnTypes.explicit(optiqRetType); - udfInfo.operandTypeInference = InferTypes.explicit(optiqArgTypes); - ImmutableList.Builder typeFamilyBuilder = new ImmutableList.Builder(); - for (RelDataType at : optiqArgTypes) { - typeFamilyBuilder.add(Util.first(at.getSqlTypeName().getFamily(), SqlTypeFamily.ANY)); - } - udfInfo.operandTypeChecker = OperandTypes.family(typeFamilyBuilder.build()); - - udfInfo.argTypes = ImmutableList. copyOf(optiqArgTypes); - udfInfo.retType = optiqRetType; - - return udfInfo; - } - - public static SqlOperator getOptiqFn(String hiveUdfName, - ImmutableList optiqArgTypes, RelDataType optiqRetType) - throws OptiqSemanticException { - - if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) { - // We can create Optiq IS_DISTINCT_FROM operator for this. But since our - // join reordering algo cant handle this anyway there is no advantage of - // this. - // So, bail out for now. - throw new OptiqSemanticException("<=> is not yet supported for cbo."); - } - SqlOperator optiqOp = hiveToOptiq.get(hiveUdfName); - if (optiqOp == null) { - OptiqUDFInfo uInf = getUDFInfo(hiveUdfName, optiqArgTypes, optiqRetType); - optiqOp = new SqlFunction(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker, - SqlFunctionCategory.USER_DEFINED_FUNCTION); - } - - return optiqOp; - } - - public static SqlAggFunction getOptiqAggFn(String hiveUdfName, - ImmutableList optiqArgTypes, RelDataType optiqRetType) { - SqlAggFunction optiqAggFn = (SqlAggFunction) hiveToOptiq.get(hiveUdfName); - if (optiqAggFn == null) { - OptiqUDFInfo uInf = getUDFInfo(hiveUdfName, optiqArgTypes, optiqRetType); - - optiqAggFn = new OptiqUDAF(uInf.udfName, uInf.returnTypeInference, uInf.operandTypeInference, - uInf.operandTypeChecker, uInf.argTypes, uInf.retType); - } - - return optiqAggFn; - } - - static class HiveToken { - int type; - String text; - String[] args; - - HiveToken(int type, String text, String... args) { - this.type = type; - this.text = text; - this.args = args; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java deleted file mode 100644 index 68f3be7..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java +++ /dev/null @@ -1,327 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.optiq.translator; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter.HiveToken; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeFactory; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.sql.type.SqlTypeName; - -import com.google.common.base.Function; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMap.Builder; -import com.google.common.collect.Lists; - -public class TypeConverter { - private static final Map optiqToHiveTypeNameMap; - - // TODO: Handling of char[], varchar[], string... - static { - Builder b = ImmutableMap. builder(); - b.put(SqlTypeName.BOOLEAN.getName(), new HiveToken(HiveParser.TOK_BOOLEAN, "TOK_BOOLEAN")); - b.put(SqlTypeName.TINYINT.getName(), new HiveToken(HiveParser.TOK_TINYINT, "TOK_TINYINT")); - b.put(SqlTypeName.SMALLINT.getName(), new HiveToken(HiveParser.TOK_SMALLINT, "TOK_SMALLINT")); - b.put(SqlTypeName.INTEGER.getName(), new HiveToken(HiveParser.TOK_INT, "TOK_INT")); - b.put(SqlTypeName.BIGINT.getName(), new HiveToken(HiveParser.TOK_BIGINT, "TOK_BIGINT")); - b.put(SqlTypeName.FLOAT.getName(), new HiveToken(HiveParser.TOK_FLOAT, "TOK_FLOAT")); - b.put(SqlTypeName.DOUBLE.getName(), new HiveToken(HiveParser.TOK_DOUBLE, "TOK_DOUBLE")); - b.put(SqlTypeName.DATE.getName(), new HiveToken(HiveParser.TOK_DATE, "TOK_DATE")); - b.put(SqlTypeName.TIMESTAMP.getName(), new HiveToken(HiveParser.TOK_TIMESTAMP, "TOK_TIMESTAMP")); - b.put(SqlTypeName.BINARY.getName(), new HiveToken(HiveParser.TOK_BINARY, "TOK_BINARY")); - optiqToHiveTypeNameMap = b.build(); - }; - - /*********************** Convert Hive Types To Optiq Types ***********************/ - public static RelDataType getType(RelOptCluster cluster, - List cInfoLst) throws OptiqSemanticException { - RexBuilder rexBuilder = cluster.getRexBuilder(); - RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); - List fieldTypes = new LinkedList(); - List fieldNames = new LinkedList(); - - for (ColumnInfo ci : cInfoLst) { - fieldTypes.add(convert(ci.getType(), dtFactory)); - fieldNames.add(ci.getInternalName()); - } - return dtFactory.createStructType(fieldTypes, fieldNames); - } - - public static RelDataType getType(RelOptCluster cluster, RowResolver rr, - List neededCols) throws OptiqSemanticException { - RexBuilder rexBuilder = cluster.getRexBuilder(); - RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); - RowSchema rs = rr.getRowSchema(); - List fieldTypes = new LinkedList(); - List fieldNames = new LinkedList(); - - for (ColumnInfo ci : rs.getSignature()) { - if (neededCols == null || neededCols.contains(ci.getInternalName())) { - fieldTypes.add(convert(ci.getType(), dtFactory)); - fieldNames.add(ci.getInternalName()); - } - } - return dtFactory.createStructType(fieldTypes, fieldNames); - } - - public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) - throws OptiqSemanticException{ - RelDataType convertedType = null; - - switch (type.getCategory()) { - case PRIMITIVE: - convertedType = convert((PrimitiveTypeInfo) type, dtFactory); - break; - case LIST: - convertedType = convert((ListTypeInfo) type, dtFactory); - break; - case MAP: - convertedType = convert((MapTypeInfo) type, dtFactory); - break; - case STRUCT: - convertedType = convert((StructTypeInfo) type, dtFactory); - break; - case UNION: - convertedType = convert((UnionTypeInfo) type, dtFactory); - break; - } - return convertedType; - } - - public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) { - RelDataType convertedType = null; - - switch (type.getPrimitiveCategory()) { - case VOID: - convertedType = dtFactory.createSqlType(SqlTypeName.NULL); - break; - case BOOLEAN: - convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN); - break; - case BYTE: - convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT); - break; - case SHORT: - convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT); - break; - case INT: - convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER); - break; - case LONG: - convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT); - break; - case FLOAT: - convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT); - break; - case DOUBLE: - convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); - break; - case STRING: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); - break; - case DATE: - convertedType = dtFactory.createSqlType(SqlTypeName.DATE); - break; - case TIMESTAMP: - convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP); - break; - case BINARY: - convertedType = dtFactory.createSqlType(SqlTypeName.BINARY); - break; - case DECIMAL: - DecimalTypeInfo dtInf = (DecimalTypeInfo) type; - convertedType = dtFactory - .createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); - break; - case VARCHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, - ((BaseCharTypeInfo) type).getLength()); - break; - case CHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, - ((BaseCharTypeInfo) type).getLength()); - break; - case UNKNOWN: - convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); - break; - } - - if (null == convertedType) { - throw new RuntimeException("Unsupported Type : " + type.getTypeName()); - } - - return dtFactory.createTypeWithNullability(convertedType, true); - } - - public static RelDataType convert(ListTypeInfo lstType, - RelDataTypeFactory dtFactory) throws OptiqSemanticException { - RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory); - return dtFactory.createArrayType(elemType, -1); - } - - public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) - throws OptiqSemanticException { - RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory); - RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory); - return dtFactory.createMapType(keyType, valueType); - } - - public static RelDataType convert(StructTypeInfo structType, - final RelDataTypeFactory dtFactory) throws OptiqSemanticException { - List fTypes = new ArrayList(structType.getAllStructFieldTypeInfos().size()); - for (TypeInfo ti : structType.getAllStructFieldTypeInfos()) { - fTypes.add(convert(ti,dtFactory)); - } - return dtFactory.createStructType(fTypes, structType.getAllStructFieldNames()); - } - - public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) - throws OptiqSemanticException{ - // Union type is not supported in Optiq. - throw new OptiqSemanticException("Union type is not supported"); - } - - public static TypeInfo convert(RelDataType rType) { - if (rType.isStruct()) { - return convertStructType(rType); - } else if (rType.getComponentType() != null) { - return convertListType(rType); - } else if (rType.getKeyType() != null) { - return convertMapType(rType); - } else { - return convertPrimtiveType(rType); - } - } - - public static TypeInfo convertStructType(RelDataType rType) { - List fTypes = Lists.transform(rType.getFieldList(), - new Function() { - @Override - public TypeInfo apply(RelDataTypeField f) { - return convert(f.getType()); - } - }); - List fNames = Lists.transform(rType.getFieldList(), - new Function() { - @Override - public String apply(RelDataTypeField f) { - return f.getName(); - } - }); - return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); - } - - public static TypeInfo convertMapType(RelDataType rType) { - return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), - convert(rType.getValueType())); - } - - public static TypeInfo convertListType(RelDataType rType) { - return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); - } - - public static TypeInfo convertPrimtiveType(RelDataType rType) { - switch (rType.getSqlTypeName()) { - case BOOLEAN: - return TypeInfoFactory.booleanTypeInfo; - case TINYINT: - return TypeInfoFactory.byteTypeInfo; - case SMALLINT: - return TypeInfoFactory.shortTypeInfo; - case INTEGER: - return TypeInfoFactory.intTypeInfo; - case BIGINT: - return TypeInfoFactory.longTypeInfo; - case FLOAT: - return TypeInfoFactory.floatTypeInfo; - case DOUBLE: - return TypeInfoFactory.doubleTypeInfo; - case DATE: - return TypeInfoFactory.dateTypeInfo; - case TIMESTAMP: - return TypeInfoFactory.timestampTypeInfo; - case BINARY: - return TypeInfoFactory.binaryTypeInfo; - case DECIMAL: - return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); - case VARCHAR: - if (rType.getPrecision() == Integer.MAX_VALUE) - return TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); - else - return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); - case CHAR: - return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); - case OTHER: - default: - return TypeInfoFactory.voidTypeInfo; - } - - } - - /*********************** Convert Optiq Types To Hive Types ***********************/ - public static HiveToken hiveToken(RelDataType optiqType) { - HiveToken ht = null; - - switch (optiqType.getSqlTypeName()) { - case CHAR: { - ht = new HiveToken(HiveParser.TOK_CHAR, "TOK_CHAR", String.valueOf(optiqType.getPrecision())); - } - break; - case VARCHAR: { - if (optiqType.getPrecision() == Integer.MAX_VALUE) - ht = new HiveToken(HiveParser.TOK_STRING, "TOK_STRING", String.valueOf(optiqType - .getPrecision())); - else - ht = new HiveToken(HiveParser.TOK_VARCHAR, "TOK_VARCHAR", String.valueOf(optiqType - .getPrecision())); - } - break; - case DECIMAL: { - ht = new HiveToken(HiveParser.TOK_DECIMAL, "TOK_DECIMAL", String.valueOf(optiqType - .getPrecision()), String.valueOf(optiqType.getScale())); - } - break; - default: - ht = optiqToHiveTypeNameMap.get(optiqType.getSqlTypeName().getName()); - } - - return ht; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1b60cbb..7aede8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -20,11 +20,28 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.io.Serializable; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.UndeclaredThrowableException; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.Token; @@ -33,6 +50,67 @@ import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptQuery; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepMatchOrder; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; +import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; +import org.apache.calcite.rel.rules.JoinToMultiJoinRule; +import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexWindowBound; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.util.CompositeList; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Pair; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -100,29 +178,29 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider; -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl; -import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; -import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; -import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; @@ -216,92 +294,12 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import org.eigenbase.rel.AggregateCall; -import org.eigenbase.rel.AggregateRelBase; -import org.eigenbase.rel.Aggregation; -import org.eigenbase.rel.FilterRelBase; -import org.eigenbase.rel.InvalidRelException; -import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.JoinRelType; -import org.eigenbase.rel.RelCollation; -import org.eigenbase.rel.RelCollationImpl; -import org.eigenbase.rel.RelFactories; -import org.eigenbase.rel.RelFieldCollation; -import org.eigenbase.rel.RelNode; -import org.eigenbase.rel.metadata.CachingRelMetadataProvider; -import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; -import org.eigenbase.rel.metadata.RelMetadataProvider; -import org.eigenbase.rel.rules.ConvertMultiJoinRule; -import org.eigenbase.rel.rules.FilterAggregateTransposeRule; -import org.eigenbase.rel.rules.LoptOptimizeJoinRule; -import org.eigenbase.rel.rules.MergeFilterRule; -import org.eigenbase.rel.rules.PushFilterPastProjectRule; -import org.eigenbase.rel.rules.PushFilterPastSetOpRule; -import org.eigenbase.rel.rules.PushSemiJoinPastFilterRule; -import org.eigenbase.rel.rules.PushSemiJoinPastJoinRule; -import org.eigenbase.rel.rules.PushSemiJoinPastProjectRule; -import org.eigenbase.rel.rules.SemiJoinRel; -import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule; -import org.eigenbase.relopt.RelOptCluster; -import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelOptQuery; -import org.eigenbase.relopt.RelOptRule; -import org.eigenbase.relopt.RelOptSchema; -import org.eigenbase.relopt.RelOptUtil; -import org.eigenbase.relopt.RelTraitSet; -import org.eigenbase.relopt.hep.HepMatchOrder; -import org.eigenbase.relopt.hep.HepPlanner; -import org.eigenbase.relopt.hep.HepProgram; -import org.eigenbase.relopt.hep.HepProgramBuilder; -import org.eigenbase.reltype.RelDataType; -import org.eigenbase.reltype.RelDataTypeFactory; -import org.eigenbase.reltype.RelDataTypeField; -import org.eigenbase.rex.RexBuilder; -import org.eigenbase.rex.RexFieldCollation; -import org.eigenbase.rex.RexInputRef; -import org.eigenbase.rex.RexNode; -import org.eigenbase.rex.RexUtil; -import org.eigenbase.rex.RexWindowBound; -import org.eigenbase.sql.SqlAggFunction; -import org.eigenbase.sql.SqlCall; -import org.eigenbase.sql.SqlExplainLevel; -import org.eigenbase.sql.SqlKind; -import org.eigenbase.sql.SqlLiteral; -import org.eigenbase.sql.SqlNode; -import org.eigenbase.sql.SqlWindow; -import org.eigenbase.sql.parser.SqlParserPos; -import org.eigenbase.sql.type.SqlTypeName; -import org.eigenbase.sql2rel.RelFieldTrimmer; -import org.eigenbase.util.CompositeList; -import org.eigenbase.util.ImmutableIntList; -import org.eigenbase.util.Pair; -import java.io.IOException; -import java.io.Serializable; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.UndeclaredThrowableException; -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.TreeSet; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - -import net.hydromatic.optiq.SchemaPlus; -import net.hydromatic.optiq.tools.Frameworks; +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; /** * Implementation of the semantic analyzer. It generates the query plan. @@ -2864,7 +2862,7 @@ private Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, } if (ensureUniqueCols) { if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) { - throw new OptiqSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings"); } } else { @@ -10055,12 +10053,12 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { if (runCBO) { disableJoinMerge = true; - OptiqBasedPlanner optiqPlanner = new OptiqBasedPlanner(); + CalciteBasedPlanner calcitePlanner = new CalciteBasedPlanner(); boolean reAnalyzeAST = false; try { // 1. Gen Optimized AST - ASTNode newAST = optiqPlanner.getOptimizedAST(prunedPartitions); + ASTNode newAST = calcitePlanner.getOptimizedAST(prunedPartitions); // 1.1. Fix up the query for insert/ctas newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); @@ -10095,14 +10093,14 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { * .getRowResolver(), true); */ } catch (Exception e) { - boolean isMissingStats = optiqPlanner.noColsMissingStats.get() > 0; + boolean isMissingStats = calcitePlanner.noColsMissingStats.get() > 0; if (isMissingStats) { LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO"); } else { LOG.error("CBO failed, skipping CBO. ", e); } if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || isMissingStats - || e instanceof OptiqSemanticException) { + || e instanceof CalciteSemanticException) { reAnalyzeAST = true; } else if (e instanceof SemanticException) { throw (SemanticException)e; @@ -10266,7 +10264,7 @@ private boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { // be supported and would require additional checks similar to IsQuery? boolean isSupportedType = qb.getIsQuery() || qb.isCTAS() || cboCtx.type == PreCboCtx.Type.INSERT; - boolean noBadTokens = HiveOptiqUtil.validateASTForUnsupportedTokens(ast); + boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); boolean result = isSupportedRoot && isSupportedType && createVwDesc == null && noBadTokens; if (!result) { if (needToLogMessage) { @@ -12489,10 +12487,10 @@ protected boolean deleting() { return false; } - /**** Temporary Place Holder For Optiq plan Gen, Optimizer ****/ + /**** Temporary Place Holder For Calcite plan Gen, Optimizer ****/ /** - * Entry point to Optimizations using Optiq. Checks whether Optiq can handle the query. + * Entry point to Optimizations using Calcite. Checks whether Calcite can handle the query. * @param qbToChk Query block to check. * @param verbose Whether return value should be verbose in case of failure. * @return null if the query can be handled; non-null reason string if it cannot be. @@ -12530,35 +12528,35 @@ private String canHandleQbForCbo(QB qbToChk, boolean topLevelQB, boolean verbose return msg; } - private class OptiqBasedPlanner implements Frameworks.PlannerAction { + private class CalciteBasedPlanner implements Frameworks.PlannerAction { private RelOptCluster cluster; private RelOptSchema relOptSchema; private SemanticException semanticException; private Map partitionCache; - private final AtomicInteger noColsMissingStats = new AtomicInteger(0); + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); List topLevelFieldSchema; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. LinkedHashMap relToHiveRR = new LinkedHashMap(); - LinkedHashMap> relToHiveColNameOptiqPosMap = new LinkedHashMap>(); + LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); private ASTNode getOptimizedAST(Map partitionCache) throws SemanticException { - ASTNode optiqOptimizedAST = null; - RelNode optimizedOptiqPlan = null; + ASTNode calciteOptimizedAST = null; + RelNode optimizedCalcitePlan = null; this.partitionCache = partitionCache; try { - optimizedOptiqPlan = Frameworks.withPlanner(this, + optimizedCalcitePlan = Frameworks.withPlanner(this, Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); } catch (Exception e) { rethrowCalciteException(e); throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); } - optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema); + calciteOptimizedAST = ASTConverter.convert(optimizedCalcitePlan, topLevelFieldSchema); - return optiqOptimizedAST; + return calciteOptimizedAST; } /* @@ -12599,9 +12597,9 @@ private boolean isUselessCause(Throwable t) { @Override public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { - RelNode optiqGenPlan = null; - RelNode optiqPreCboPlan = null; - RelNode optiqOptimizedPlan = null; + RelNode calciteGenPlan = null; + RelNode calcitePreCboPlan = null; + RelNode calciteOptimizedPlan = null; /* * recreate cluster, so that it picks up the additional traitDef @@ -12615,24 +12613,24 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.relOptSchema = relOptSchema; try { - optiqGenPlan = genLogicalPlan(qb, true); - topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan), + calciteGenPlan = genLogicalPlan(qb, true); + topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calciteGenPlan), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); } catch (SemanticException e) { semanticException = e; throw new RuntimeException(e); } - optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); + calcitePreCboPlan = applyPreCBOTransforms(calciteGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); List list = Lists.newArrayList(); list.add(HiveDefaultRelMetadataProvider.INSTANCE); - RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); + RelTraitSet desiredTraits = cluster.traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); HepProgram hepPgm = null; HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) - .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class)); - hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoinRel.HIVE_JOIN_FACTORY, - HiveProjectRel.DEFAULT_PROJECT_FACTORY, HiveFilterRel.DEFAULT_FILTER_FACTORY)); + .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); @@ -12641,25 +12639,25 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - RelNode rootRel = optiqPreCboPlan; + RelNode rootRel = calcitePreCboPlan; hepPlanner.setRoot(rootRel); - if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) { - rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits); + if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); } hepPlanner.setRoot(rootRel); - optiqOptimizedPlan = hepPlanner.findBestExp(); + calciteOptimizedPlan = hepPlanner.findBestExp(); if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { LOG.debug("CBO Planning details:\n"); - LOG.debug("Original Plan:\n" + RelOptUtil.toString(optiqGenPlan)); + LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n" - + RelOptUtil.toString(optiqPreCboPlan)); + + RelOptUtil.toString(calcitePreCboPlan)); LOG.debug("Plan After Join Reordering:\n" - + RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); } - return optiqOptimizedPlan; + return calciteOptimizedPlan; } public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { @@ -12670,37 +12668,37 @@ public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdPro // Push Down Semi Joins basePlan = hepPlan(basePlan, true, mdProvider, - PushSemiJoinPastJoinRule.INSTANCE, - new PushSemiJoinPastFilterRule(HiveFilterRel.DEFAULT_FILTER_FACTORY), - new PushSemiJoinPastProjectRule(HiveProjectRel.DEFAULT_PROJECT_FACTORY)); + SemiJoinJoinTransposeRule.INSTANCE, + SemiJoinFilterTransposeRule.INSTANCE, + SemiJoinProjectTransposeRule.INSTANCE); basePlan = hepPlan(basePlan, true, mdProvider, - new PushFilterPastProjectRule( - FilterRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class, - HiveProjectRel.DEFAULT_PROJECT_FACTORY), new PushFilterPastSetOpRule( - HiveFilterRel.DEFAULT_FILTER_FACTORY), new MergeFilterRule( - HiveFilterRel.DEFAULT_FILTER_FACTORY), HivePushFilterPastJoinRule.JOIN, - HivePushFilterPastJoinRule.FILTER_ON_JOIN, + new FilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new FilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), new FilterMergeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule( - FilterRelBase.class, - HiveFilterRel.DEFAULT_FILTER_FACTORY, - AggregateRelBase.class)); + Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, + Aggregate.class)); - basePlan = hepPlan(basePlan, false, mdProvider, new TransitivePredicatesOnJoinRule( - JoinRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY), - // TODO: Enable it after OPTIQ-407 is fixed + basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( + Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + // TODO: Enable it after CALCITE-407 is fixed //RemoveTrivialProjectRule.INSTANCE, - new HivePartitionPrunerRule(SemanticAnalyzer.this.conf)); + new HivePartitionPruneRule(SemanticAnalyzer.this.conf)); - RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProjectRel.DEFAULT_PROJECT_FACTORY, - HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveJoinRel.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, - HiveSortRel.HIVE_SORT_REL_FACTORY, HiveAggregateRel.HIVE_AGGR_REL_FACTORY, HiveUnionRel.UNION_REL_FACTORY); + RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, + HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); basePlan = hepPlan(basePlan, true, mdProvider, - new PushFilterPastProjectRule(FilterRelBase.class, - HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class, - HiveProjectRel.DEFAULT_PROJECT_FACTORY)); + new FilterProjectTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY)); return basePlan; } @@ -12736,7 +12734,7 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, @SuppressWarnings("nls") private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException { - HiveUnionRel unionRel = null; + HiveUnion unionRel = null; // 1. Get Row Resolvers, Column map for original left and right input of // Union Rel @@ -12768,7 +12766,7 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode + " does not have the field " + field)); } if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { - throw new OptiqSemanticException(generateErrorMessage(tabref, + throw new CalciteSemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: field " + field + ":" + " appears on the left side of the UNION at column position: " + getPositionFromInternalName(lInfo.getInternalName()) @@ -12780,7 +12778,7 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType()); if (commonTypeInfo == null) { - throw new OptiqSemanticException(generateErrorMessage(tabref, + throw new CalciteSemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table")); @@ -12800,7 +12798,7 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode unionoutRR.put(unionalias, field, unionColInfo); } - // 4. Determine which columns requires cast on left/right input (Optiq + // 4. Determine which columns requires cast on left/right input (Calcite // requires exact types on both sides of union) boolean leftNeedsTypeCast = false; boolean rightNeedsTypeCast = false; @@ -12842,11 +12840,11 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode RelNode unionLeftInput = leftRel; RelNode unionRightInput = rightRel; if (leftNeedsTypeCast) { - unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType() + unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() .getFieldNames()); } if (rightNeedsTypeCast) { - unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType() + unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() .getFieldNames()); } @@ -12854,12 +12852,12 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode ImmutableList.Builder bldr = new ImmutableList.Builder(); bldr.add(unionLeftInput); bldr.add(unionRightInput); - unionRel = new HiveUnionRel(cluster, TraitsUtil.getDefaultTraitSet(cluster), + unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); relToHiveRR.put(unionRel, unionoutRR); - relToHiveColNameOptiqPosMap.put(unionRel, - this.buildHiveToOptiqColumnMap(unionoutRR, unionRel)); + relToHiveColNameCalcitePosMap.put(unionRel, + this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); return unionRel; } @@ -12884,7 +12882,7 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ } // 2. Construct ExpressionNodeDesc representing Join Condition - RexNode optiqJoinCond = null; + RexNode calciteJoinCond = null; if (joinCond != null) { JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, @@ -12898,10 +12896,10 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ List inputRels = new ArrayList(); inputRels.add(leftRel); inputRels.add(rightRel); - optiqJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, - relToHiveRR, relToHiveColNameOptiqPosMap, false); + calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, + relToHiveRR, relToHiveColNameCalcitePosMap, false); } else { - optiqJoinCond = cluster.getRexBuilder().makeLiteral(true); + calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); } // 3. Validate that join condition is legal (i.e no function refering to @@ -12911,24 +12909,24 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ // 4. Construct Join Rel Node boolean leftSemiJoin = false; - JoinRelType optiqJoinType; + JoinRelType calciteJoinType; switch (hiveJoinType) { case LEFTOUTER: - optiqJoinType = JoinRelType.LEFT; + calciteJoinType = JoinRelType.LEFT; break; case RIGHTOUTER: - optiqJoinType = JoinRelType.RIGHT; + calciteJoinType = JoinRelType.RIGHT; break; case FULLOUTER: - optiqJoinType = JoinRelType.FULL; + calciteJoinType = JoinRelType.FULL; break; case LEFTSEMI: - optiqJoinType = JoinRelType.INNER; + calciteJoinType = JoinRelType.INNER; leftSemiJoin = true; break; case INNER: default: - optiqJoinType = JoinRelType.INNER; + calciteJoinType = JoinRelType.INNER; break; } @@ -12938,7 +12936,7 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ List rightJoinKeys = new ArrayList(); RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, - optiqJoinCond, leftJoinKeys, rightJoinKeys, null, null); + calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); if (!nonEquiConds.isAlwaysTrue()) { throw new SemanticException("Non equality condition not supported in Semi-Join" @@ -12948,19 +12946,19 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; final List leftKeys = new ArrayList(); final List rightKeys = new ArrayList(); - optiqJoinCond = HiveOptiqUtil.projectNonColumnEquiConditions( - HiveProjectRel.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, + calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions( + HiveProject.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, leftKeys, rightKeys); - joinRel = new SemiJoinRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), - inputRels[0], inputRels[1], optiqJoinCond, ImmutableIntList.copyOf(leftKeys), + joinRel = new SemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), ImmutableIntList.copyOf(rightKeys)); } else { - joinRel = HiveJoinRel.getJoin(cluster, leftRel, rightRel, optiqJoinCond, optiqJoinType, + joinRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType, leftSemiJoin); } // 5. Add new JoinRel & its RR to the maps - relToHiveColNameOptiqPosMap.put(joinRel, this.buildHiveToOptiqColumnMap(joinRR, joinRel)); + relToHiveColNameCalcitePosMap.put(joinRel, this.buildHiveToCalciteColumnMap(joinRR, joinRel)); relToHiveRR.put(joinRel, joinRR); return joinRel; @@ -12986,7 +12984,7 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + " turn off cbo to use UNIQUE JOIN."); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 1. Determine Join Type @@ -13060,18 +13058,18 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { RowResolver rr = new RowResolver(); - HiveTableScanRel tableRel = null; + HiveTableScan tableRel = null; try { - // 1. If the table has a Sample specified, bail from Optiq path. + // 1. If the table has a Sample specified, bail from Calcite path. if ( qb.getParseInfo().getTabSample(tableAlias) != null || SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) { String msg = String.format("Table Sample specified for %s." + " Currently we don't support Table Sample clauses in CBO," + " turn off cbo for queries on tableSamples.", tableAlias); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 2. Get Table Metadata @@ -13136,13 +13134,13 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc noColsMissingStats); // 5. Build Hive Table Scan Rel - tableRel = new HiveTableScanRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), optTable, + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, rowType); // 6. Add Schema(RR) to RelNode-Schema map - ImmutableMap hiveToOptiqColMap = buildHiveToOptiqColumnMap(rr, tableRel); + ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, tableRel); relToHiveRR.put(tableRel, rr); - relToHiveColNameOptiqPosMap.put(tableRel, hiveToOptiqColMap); + relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); } catch (Exception e) { if (e instanceof SemanticException) { throw (SemanticException) e; @@ -13159,21 +13157,21 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws Sema if (filterCondn instanceof ExprNodeConstantDesc && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { // queries like select * from t1 where 'foo'; - // Optiq's rule PushFilterThroughProject chokes on it. Arguably, we can insert a cast to + // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we can insert a cast to // boolean in such cases, but since Postgres, Oracle and MS SQL server fail on compile time // for such queries, its an arcane corner case, not worth of adding that complexity. - throw new OptiqSemanticException("Filter expression with non-boolean return type."); + throw new CalciteSemanticException("Filter expression with non-boolean return type."); } - ImmutableMap hiveColNameOptiqPosMap = this.relToHiveColNameOptiqPosMap + ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap .get(srcRel); RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - hiveColNameOptiqPosMap, 0, true).convert(filterCondn); + hiveColNameCalcitePosMap, 0, true).convert(filterCondn); RexNode factoredFilterExpr = RexUtil.pullFactors(cluster.getRexBuilder(), convertedFilterExpr); - RelNode filterRel = new HiveFilterRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel, factoredFilterExpr); - this.relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); - relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); return filterRel; } @@ -13187,8 +13185,8 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, * #genFilterPlan} - for now we will support the same behavior as non CBO * route. - but plan to allow nested SubQueries(Restriction.9.m) and * multiple SubQuery expressions(Restriction.8.m). This requires use to - * utilize Optiq's Decorrelation mechanics, and for Optiq to fix/flush out - * Null semantics(OPTIQ-373) - besides only the driving code has been + * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush out + * Null semantics(CALCITE-373) - besides only the driving code has been * copied. Most of the code which is SubQueryUtils and QBSubQuery is * reused. */ @@ -13222,7 +13220,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outerQBRR = inputRR; ImmutableMap outerQBPosMap = - relToHiveColNameOptiqPosMap.get(srcRel); + relToHiveColNameCalcitePosMap.get(srcRel); for (int i = 0; i < subQueries.size(); i++) { ASTNode subQueryAST = subQueries.get(i); @@ -13314,7 +13312,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, } } relToHiveRR.put(srcRel, outerQBRR); - relToHiveColNameOptiqPosMap.put(srcRel, outerQBPosMap); + relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); return srcRel; } @@ -13326,20 +13324,20 @@ private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws Sema RowResolver oRR = new RowResolver(); RowResolver.add(oRR, iRR, numColumns); - List optiqColLst = new ArrayList(); + List calciteColLst = new ArrayList(); List oFieldNames = new ArrayList(); RelDataType iType = srcRel.getRowType(); for (int i = 0; i < iType.getFieldCount(); i++) { RelDataTypeField fType = iType.getFieldList().get(i); String fName = iType.getFieldNames().get(i); - optiqColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); + calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); oFieldNames.add(fName); } - HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(oRR, selRel)); + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); this.relToHiveRR.put(selRel, oRR); return selRel; } @@ -13380,11 +13378,11 @@ private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbC RexNodeConverter converter, HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { - // 1. Get agg fn ret type in Optiq + // 1. Get agg fn ret type in Calcite RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, this.cluster.getTypeFactory()); - // 2. Convert Agg Fn args and type of args to Optiq + // 2. Convert Agg Fn args and type of args to Calcite // TODO: Does HQL allows expressions as aggregate args or can it only be // projections from child? Integer inputIndx; @@ -13407,9 +13405,9 @@ private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbC aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); } - // 3. Get Aggregation FN from Optiq given name, ret type and input arg + // 3. Get Aggregation FN from Calcite given name, ret type and input arg // type - final Aggregation aggregation = SqlFunctionConverter.getOptiqAggFn(agg.m_udfName, + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, aggArgRelDTBldr.build(), aggFnRetType); return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); @@ -13418,22 +13416,23 @@ private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbC private RelNode genGBRelNode(List gbExprs, List aggInfoLst, RelNode srcRel) throws SemanticException { RowResolver gbInputRR = this.relToHiveRR.get(srcRel); - ImmutableMap posMap = this.relToHiveColNameOptiqPosMap.get(srcRel); + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, 0, false); final List gbChildProjLst = Lists.newArrayList(); final HashMap rexNodeToPosMap = new HashMap(); - final BitSet groupSet = new BitSet(); + final List groupSetPositions = Lists.newArrayList(); Integer gbIndx = 0; RexNode rnd; for (ExprNodeDesc key : gbExprs) { rnd = converter.convert(key); gbChildProjLst.add(rnd); - groupSet.set(gbIndx); + groupSetPositions.add(gbIndx); rexNodeToPosMap.put(rnd.toString(), gbIndx); gbIndx++; } + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); List aggregateCalls = Lists.newArrayList(); int i = aggInfoLst.size(); @@ -13447,12 +13446,12 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs // first element from srcRel gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); } - RelNode gbInputRel = HiveProjectRel.create(srcRel, gbChildProjLst, null); + RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); - HiveRel aggregateRel = null; + HiveRelNode aggregateRel = null; try { - aggregateRel = new HiveAggregateRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), - gbInputRel, groupSet, aggregateCalls); + aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + gbInputRel, false, groupSet, null, aggregateCalls); } catch (InvalidRelException e) { throw new SemanticException(e); } @@ -13592,7 +13591,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException RelNode gbRel = null; QBParseInfo qbp = getQBParseInfo(qb); - // 0. for GSets, Cube, Rollup, bail from Optiq path. + // 0. for GSets, Cube, Rollup, bail from Calcite path. if (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()) { @@ -13612,7 +13611,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException + " clauses in CBO," + " turn off cbo for these queries.", gbyClause); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 1. Gather GB Expressions (AST) (GB + Aggregations) @@ -13641,7 +13640,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); if (grpbyExprNDesc == null) - throw new OptiqSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); + throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, grpbyExprNDesc, gbExprNDescLst, outputColumnNames); @@ -13682,8 +13681,8 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); - relToHiveColNameOptiqPosMap.put(gbRel, - buildHiveToOptiqColumnMap(groupByOutputRowResolver, gbRel)); + relToHiveColNameCalcitePosMap.put(gbRel, + buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); this.relToHiveRR.put(gbRel, groupByOutputRowResolver); } @@ -13735,7 +13734,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException RexNode rnd; RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameOptiqPosMap.get(srcRel), 0, false); + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); int srcRelRecordSz = srcRel.getRowType().getFieldCount(); for (int i = 0; i < obASTExprLst.size(); i++) { @@ -13751,7 +13750,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException rnd = converter.convert(obExprNDesc); // 2.3 Determine the index of ob expr in child schema - // NOTE: Optiq can not take compound exprs in OB without it being + // NOTE: Calcite can not take compound exprs in OB without it being // present in the child (& hence we add a child Project Rel) if (rnd instanceof RexInputRef) { fieldIndex = ((RexInputRef) rnd).getIndex(); @@ -13763,7 +13762,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } // 2.4 Determine the Direction of order by - org.eigenbase.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { order = RelFieldCollation.Direction.ASCENDING; } @@ -13785,7 +13784,7 @@ public RexNode apply(RelDataTypeField input) { }); RowResolver obSyntheticProjectRR = new RowResolver(); if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new OptiqSemanticException( + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message"); } int vcolPos = inputRR.getRowSchema().getSignature().size(); @@ -13799,28 +13798,28 @@ public RexNode apply(RelDataTypeField input) { if (outermostOB) { if (!RowResolver.add(outputRR, inputRR)) { - throw new OptiqSemanticException( + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message"); } } else { if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new OptiqSemanticException( + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message"); } originalOBChild = srcRel; } } else { if (!RowResolver.add(outputRR, inputRR)) { - throw new OptiqSemanticException( + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message"); } } // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION); + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation, null, null); + sortRel = new HiveSort(cluster, traitSet, obInputRel, canonizedCollation, null, null); // 5. Update the maps // NOTE: Output RR for SortRel is considered same as its input; we may @@ -13828,35 +13827,35 @@ public RexNode apply(RelDataTypeField input) { // rowtype of sortrel is the type of it child; if child happens to be // synthetic project that we introduced then that projectrel would // contain the vc. - ImmutableMap hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR, + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, sortRel); relToHiveRR.put(sortRel, outputRR); - relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); } return (new Pair(sortRel, originalOBChild)); } private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - HiveRel sortRel = null; + HiveRelNode sortRel = null; QBParseInfo qbp = getQBParseInfo(qb); Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); if (limit != null) { RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); - RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION); + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); - sortRel = new HiveSortRel(cluster, traitSet, srcRel, canonizedCollation, null, fetch); + sortRel = new HiveSort(cluster, traitSet, srcRel, canonizedCollation, null, fetch); RowResolver outputRR = new RowResolver(); if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { - throw new OptiqSemanticException( + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message"); } - ImmutableMap hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR, + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, sortRel); relToHiveRR.put(sortRel, outputRR); - relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); } return sortRel; @@ -13969,29 +13968,29 @@ int getWindowSpecIndx(ASTNode wndAST) { AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, this.relToHiveRR.get(srcRel)); - // 3. Get Optiq Return type for Agg Fn + // 3. Get Calcite Return type for Agg Fn wHiveRetType = hiveAggInfo.m_returnType; - RelDataType optiqAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, this.cluster.getTypeFactory()); - // 4. Convert Agg Fn args to Optiq - ImmutableMap posMap = this.relToHiveColNameOptiqPosMap.get(srcRel); + // 4. Convert Agg Fn args to Calcite + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, 0, false); - Builder optiqAggFnArgsBldr = ImmutableList. builder(); - Builder optiqAggFnArgsTypeBldr = ImmutableList. builder(); + Builder calciteAggFnArgsBldr = ImmutableList. builder(); + Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); RexNode rexNd = null; for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { - optiqAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); - optiqAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) + calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); + calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) .getTypeInfo(), this.cluster.getTypeFactory())); } - ImmutableList optiqAggFnArgs = optiqAggFnArgsBldr.build(); - ImmutableList optiqAggFnArgsType = optiqAggFnArgsTypeBldr.build(); + ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); + ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); - // 5. Get Optiq Agg Fn - final SqlAggFunction optiqAggFn = SqlFunctionConverter.getOptiqAggFn(hiveAggInfo.m_udfName, - optiqAggFnArgsType, optiqAggFnRetType); + // 5. Get Calcite Agg Fn + final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn(hiveAggInfo.m_udfName, + calciteAggFnArgsType, calciteAggFnRetType); // 6. Translate Window spec RowResolver inputRR = relToHiveRR.get(srcRel); @@ -14003,7 +14002,7 @@ int getWindowSpecIndx(ASTNode wndAST) { boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true : false; - w = cluster.getRexBuilder().makeOver(optiqAggFnRetType, optiqAggFn, optiqAggFnArgs, + w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, upperBound, isRows, true, false); } else { @@ -14028,7 +14027,7 @@ private RelNode genSelectForWindowing( RowResolver inputRR = this.relToHiveRR.get(srcRel); // 2. Get RexNodes for original Projections from below List projsForWindowSelOp = new ArrayList( - HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel)); + HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel)); // 3. Construct new Row Resolver with everything from below. RowResolver out_rwsch = new RowResolver(); @@ -14058,15 +14057,15 @@ private RelNode genSelectForWindowing( return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); } - private RelNode genSelectRelNode(List optiqColLst, RowResolver out_rwsch, - RelNode srcRel) throws OptiqSemanticException { + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel) throws CalciteSemanticException { // 1. Build Column Names Set colNamesSet = new HashSet(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); ArrayList columnNames = new ArrayList(); String[] qualifiedColNames; String tmpColAlias; - for (int i = 0; i < optiqColLst.size(); i++) { + for (int i = 0; i < calciteColLst.size(); i++) { ColumnInfo cInfo = cInfoLst.get(i); qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); /* @@ -14094,23 +14093,23 @@ private RelNode genSelectRelNode(List optiqColLst, RowResolver out_rwsc columnNames.add(tmpColAlias); } - // 3 Build Optiq Rel Node for project using converted projections & col + // 3 Build Calcite Rel Node for project using converted projections & col // names - HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, columnNames); + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel)); + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); this.relToHiveRR.put(selRel, out_rwsch); return selRel; } - private RelNode genSelectRelNode(List optiqColLst, RowResolver out_rwsch, - RelNode srcRel, boolean removethismethod) throws OptiqSemanticException { + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel, boolean removethismethod) throws CalciteSemanticException { // 1. Build Column Names // TODO: Should this be external names ArrayList columnNames = new ArrayList(); - for (int i = 0; i < optiqColLst.size(); i++) { + for (int i = 0; i < calciteColLst.size(); i++) { columnNames.add(getColumnInternalName(i)); } @@ -14126,12 +14125,12 @@ public String apply(String hName) { } }); - // 3 Build Optiq Rel Node for project using converted projections & col + // 3 Build Calcite Rel Node for project using converted projections & col // names - HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel)); + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); this.relToHiveRR.put(selRel, out_rwsch); return selRel; @@ -14183,7 +14182,7 @@ private RelNode genSelectLogicalPlan( String msg = String.format("Hint specified for %s." + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 4. Bailout if select involves Transform @@ -14192,7 +14191,7 @@ private RelNode genSelectLogicalPlan( String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," + " turn off cbo to use TRANSFORM."); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 5. Bailout if select involves UDTF @@ -14205,7 +14204,7 @@ private RelNode genSelectLogicalPlan( String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," + " turn off cbo to use UDTF " + funcName); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } } @@ -14268,9 +14267,9 @@ private RelNode genSelectLogicalPlan( col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, true); } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") - && !(srcRel instanceof HiveAggregateRel)) { + && !(srcRel instanceof HiveAggregate)) { // Likely a malformed query eg, select hash(distinct c1) from t1; - throw new OptiqSemanticException("Distinct without an aggreggation."); + throw new CalciteSemanticException("Distinct without an aggreggation."); } else { // Case when this is an expression TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); @@ -14288,7 +14287,7 @@ private RelNode genSelectLogicalPlan( colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { - throw new OptiqSemanticException("Cannot add column to RR: " + tabAlias + "." + throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); } @@ -14305,16 +14304,16 @@ private RelNode genSelectLogicalPlan( } selectStar = selectStar && exprList.getChildCount() == posn + 1; - // 7. Convert Hive projections to Optiq - List optiqColLst = new ArrayList(); + // 7. Convert Hive projections to Calcite + List calciteColLst = new ArrayList(); RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); for (ExprNodeDesc colExpr : col_list) { - optiqColLst.add(rexNodeConv.convert(colExpr)); + calciteColLst.add(rexNodeConv.convert(colExpr)); } - // 8. Build Optiq Rel - RelNode selRel = genSelectRelNode(optiqColLst, out_rwsch, srcRel); + // 8. Build Calcite Rel + RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); return selRel; } @@ -14355,7 +14354,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept if (LOG.isDebugEnabled()) { LOG.debug(msg + " because it: " + reason); } - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } // 1. Build Rel For Src (SubQuery, TS, Join) @@ -14374,7 +14373,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept if (aliasToRel.isEmpty()) { // // This may happen for queries like select 1; (no source table) // We can do following which is same, as what Hive does. - // With this, we will be able to generate Optiq plan. + // With this, we will be able to generate Calcite plan. // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); // qb.addAlias(DUMMY_TABLE); @@ -14384,7 +14383,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // table // So, for now lets just disable this. Anyway there is nothing much to // optimize in such cases. - throw new OptiqSemanticException("Unsupported"); + throw new CalciteSemanticException("Unsupported"); } // 1.3 process join @@ -14424,7 +14423,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 8. Introduce top constraining select if needed. // NOTES: - // 1. Optiq can not take an expr in OB; hence it needs to be added as VC + // 1. Calcite can not take an expr in OB; hence it needs to be added as VC // in the input select; In such cases we need to introduce a select on top // to ensure VC is not visible beyond Limit, OB. // 2. Hive can not preserve order across select. In subqueries OB is used @@ -14435,8 +14434,8 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // limitation(#2) stated above. The RR for OB will not include VC. Thus // Result Schema will not include exprs used by top OB. During AST Conv, // in the PlanModifierForASTConv we would modify the top level OB to - // migrate exprs from input sel to SortRel (Note that Optiq doesn't - // support this; but since we are done with Optiq at this point its OK). + // migrate exprs from input sel to SortRel (Note that Calcite doesn't + // support this; but since we are done with Calcite at this point its OK). if (topConstrainingProjArgsRel != null) { List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() .getFieldList(), new Function() { @@ -14472,7 +14471,7 @@ public RexNode apply(RelDataTypeField input) { newRR.put(alias, tmp[1], newCi); } relToHiveRR.put(srcRel, newRR); - relToHiveColNameOptiqPosMap.put(srcRel, buildHiveToOptiqColumnMap(newRR, srcRel)); + relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); } if (LOG.isDebugEnabled()) { @@ -14489,9 +14488,9 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map 0; - throw new OptiqSemanticException("Having clause without any group-by."); + throw new CalciteSemanticException("Having clause without any group-by."); } validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, @@ -14505,10 +14504,10 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map exprToAlias = qbPI.getAllExprToColumnAlias(); @@ -14560,13 +14559,13 @@ public Object post(Object t) { + " Turn off cbo for these queries.", aliasToCheck, havingClause); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } } } - private ImmutableMap buildHiveToOptiqColumnMap(RowResolver rr, RelNode rNode) { + private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { ImmutableMap.Builder b = new ImmutableMap.Builder(); int i = 0; for (ColumnInfo ci : rr.getRowSchema().getSignature()) { @@ -14591,13 +14590,13 @@ public Object post(Object t) { return hiveColNameToInputPosMapBuilder.build(); } - private QBParseInfo getQBParseInfo(QB qb) throws OptiqSemanticException { + private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { QBParseInfo qbp = qb.getParseInfo(); if (qbp.getClauseNames().size() > 1) { String msg = String.format("Multi Insert is currently not supported in CBO," + " turn off cbo to use Multi Insert."); LOG.debug(msg); - throw new OptiqSemanticException(msg); + throw new CalciteSemanticException(msg); } return qbp; }