diff --git pom.xml pom.xml
index 688a12f..c07f692 100644
--- pom.xml
+++ pom.xml
@@ -99,7 +99,7 @@
3.4
1.7.5
0.8.0.RELEASE
- 0.9.2-incubating
+ 1.0.0-incubating-SNAPSHOT
3.2.6
3.2.10
3.2.9
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
new file mode 100644
index 0000000..a71cd35
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Exception from SemanticAnalyzer.
+ */
+
+public class CalciteSemanticException extends SemanticException {
+
+ private static final long serialVersionUID = 1L;
+
+ public CalciteSemanticException() {
+ super();
+ }
+
+ public CalciteSemanticException(String message) {
+ super(message);
+ }
+
+ public CalciteSemanticException(Throwable cause) {
+ super(cause);
+ }
+
+ public CalciteSemanticException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public CalciteSemanticException(ErrorMsg errorMsg, String... msgArgs) {
+ super(errorMsg, msgArgs);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
new file mode 100644
index 0000000..6d1e85b
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -0,0 +1,530 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.validate.SqlValidatorUtil;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+/**
+ * Generic utility functions needed for Calcite based Hive CBO.
+ */
+
+public class HiveCalciteUtil {
+
+ /**
+ * Get list of virtual columns from the given list of projections.
+ *
+ *
+ * @param exps
+ * list of rex nodes representing projections
+ * @return List of Virtual Columns, will not be null.
+ */
+ public static List getVirtualCols(List extends RexNode> exps) {
+ List vCols = new ArrayList();
+
+ for (int i = 0; i < exps.size(); i++) {
+ if (!(exps.get(i) instanceof RexInputRef)) {
+ vCols.add(i);
+ }
+ }
+
+ return vCols;
+ }
+
+ public static boolean validateASTForUnsupportedTokens(ASTNode ast) {
+ String astTree = ast.toStringTree();
+ // if any of following tokens are present in AST, bail out
+ String[] tokens = { "TOK_CHARSETLITERAL","TOK_TABLESPLITSAMPLE" };
+ for (String token : tokens) {
+ if (astTree.contains(token)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static List getProjsFromBelowAsInputRef(final RelNode rel) {
+ List projectList = Lists.transform(rel.getRowType().getFieldList(),
+ new Function() {
+ @Override
+ public RexNode apply(RelDataTypeField field) {
+ return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex());
+ }
+ });
+ return projectList;
+ }
+
+ public static List translateBitSetToProjIndx(ImmutableBitSet projBitSet) {
+ List projIndxLst = new ArrayList();
+
+ for (int i = 0; i < projBitSet.length(); i++) {
+ if (projBitSet.get(i)) {
+ projIndxLst.add(i);
+ }
+ }
+
+ return projIndxLst;
+ }
+
+ /**
+ * Push any equi join conditions that are not column references as Projections
+ * on top of the children.
+ *
+ * @param factory
+ * Project factory to use.
+ * @param inputRels
+ * inputs to a join
+ * @param leftJoinKeys
+ * expressions for LHS of join key
+ * @param rightJoinKeys
+ * expressions for RHS of join key
+ * @param systemColCount
+ * number of system columns, usually zero. These columns are
+ * projected at the leading edge of the output row.
+ * @param leftKeys
+ * on return this contains the join key positions from the new
+ * project rel on the LHS.
+ * @param rightKeys
+ * on return this contains the join key positions from the new
+ * project rel on the RHS.
+ * @return the join condition after the equi expressions pushed down.
+ */
+ public static RexNode projectNonColumnEquiConditions(ProjectFactory factory, RelNode[] inputRels,
+ List leftJoinKeys, List rightJoinKeys, int systemColCount,
+ List leftKeys, List rightKeys) {
+ RelNode leftRel = inputRels[0];
+ RelNode rightRel = inputRels[1];
+ RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder();
+ RexNode outJoinCond = null;
+
+ int origLeftInputSize = leftRel.getRowType().getFieldCount();
+ int origRightInputSize = rightRel.getRowType().getFieldCount();
+
+ List newLeftFields = new ArrayList();
+ List newLeftFieldNames = new ArrayList();
+
+ List newRightFields = new ArrayList();
+ List newRightFieldNames = new ArrayList();
+ int leftKeyCount = leftJoinKeys.size();
+ int i;
+
+ for (i = 0; i < origLeftInputSize; i++) {
+ final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i);
+ newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newLeftFieldNames.add(field.getName());
+ }
+
+ for (i = 0; i < origRightInputSize; i++) {
+ final RelDataTypeField field = rightRel.getRowType().getFieldList().get(i);
+ newRightFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newRightFieldNames.add(field.getName());
+ }
+
+ int newKeyCount = 0;
+ List> origColEqConds = new ArrayList>();
+ for (i = 0; i < leftKeyCount; i++) {
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+
+ if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) {
+ origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(),
+ ((RexInputRef) rightKey).getIndex()));
+ } else {
+ newLeftFields.add(leftKey);
+ newLeftFieldNames.add(null);
+ newRightFields.add(rightKey);
+ newRightFieldNames.add(null);
+ newKeyCount++;
+ }
+ }
+
+ for (i = 0; i < origColEqConds.size(); i++) {
+ Pair p = origColEqConds.get(i);
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+ leftKeys.add(p.left);
+ rightKeys.add(p.right);
+ RexNode cond = rexBuilder.makeCall(
+ SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left),
+ rexBuilder.makeInputRef(rightKey.getType(), systemColCount + origLeftInputSize
+ + newKeyCount + p.right));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond);
+ }
+ }
+
+ if (newKeyCount == 0) {
+ return outJoinCond;
+ }
+
+ int newLeftOffset = systemColCount + origLeftInputSize;
+ int newRightOffset = systemColCount + origLeftInputSize + origRightInputSize + newKeyCount;
+ for (i = 0; i < newKeyCount; i++) {
+ leftKeys.add(origLeftInputSize + i);
+ rightKeys.add(origRightInputSize + i);
+ RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newLeftOffset + i),
+ rexBuilder.makeInputRef(newLeftFields.get(i).getType(), newRightOffset + i));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond);
+ }
+ }
+
+ // added project if need to produce new keys than the original input
+ // fields
+ if (newKeyCount > 0) {
+ leftRel = factory.createProject(leftRel, newLeftFields,
+ SqlValidatorUtil.uniquify(newLeftFieldNames));
+ rightRel = factory.createProject(rightRel, newRightFields,
+ SqlValidatorUtil.uniquify(newRightFieldNames));
+ }
+
+ inputRels[0] = leftRel;
+ inputRels[1] = rightRel;
+
+ return outJoinCond;
+ }
+
+ /**
+ * JoinPredicateInfo represents Join condition; JoinPredicate Info uses
+ * JoinLeafPredicateInfo to represent individual conjunctive elements in the
+ * predicate.
+ * JoinPredicateInfo = JoinLeafPredicateInfo1 and JoinLeafPredicateInfo2...
+ *
+ * JoinPredicateInfo:
+ * 1. preserves the order of conjuctive elements for
+ * equi-join(equiJoinPredicateElements)
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ * 3. Keeps a map of projection indexes that are part of join keys to list of
+ * conjuctive elements(JoinLeafPredicateInfo) that uses them.
+ *
+ */
+ public static class JoinPredicateInfo {
+ private final ImmutableList nonEquiJoinPredicateElements;
+ private final ImmutableList equiJoinPredicateElements;
+ private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema;
+ private final ImmutableMap> mapOfProjIndxInJoinSchemaToLeafPInfo;
+
+ public JoinPredicateInfo(List nonEquiJoinPredicateElements,
+ List equiJoinPredicateElements,
+ Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema,
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo) {
+ this.nonEquiJoinPredicateElements = ImmutableList.copyOf(nonEquiJoinPredicateElements);
+ this.equiJoinPredicateElements = ImmutableList.copyOf(equiJoinPredicateElements);
+ this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ this.mapOfProjIndxInJoinSchemaToLeafPInfo = ImmutableMap
+ .copyOf(mapOfProjIndxInJoinSchemaToLeafPInfo);
+ }
+
+ public List getNonEquiJoinPredicateElements() {
+ return this.nonEquiJoinPredicateElements;
+ }
+
+ public List getEquiJoinPredicateElements() {
+ return this.equiJoinPredicateElements;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return this.projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return this.projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return this.projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return this.projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ public Map> getMapOfProjIndxToLeafPInfo() {
+ return this.mapOfProjIndxInJoinSchemaToLeafPInfo;
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j) {
+ return constructJoinPredicateInfo(j, j.getCondition());
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j, RexNode predicate) {
+ JoinPredicateInfo jpi = null;
+ JoinLeafPredicateInfo jlpi = null;
+ List equiLPIList = new ArrayList();
+ List nonEquiLPIList = new ArrayList();
+ Set projsFromLeftPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ Map> tmpMapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ List tmpJLPILst = null;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+ int projIndxInJoin;
+ List conjuctiveElements;
+
+ // 1. Decompose Join condition to a number of leaf predicates
+ // (conjuctive elements)
+ conjuctiveElements = RelOptUtil.conjunctions(predicate);
+
+ // 2. Walk through leaf predicates building up JoinLeafPredicateInfo
+ for (RexNode ce : conjuctiveElements) {
+ // 2.1 Construct JoinLeafPredicateInfo
+ jlpi = JoinLeafPredicateInfo.constructJoinLeafPredicateInfo(j, ce);
+
+ // 2.2 Classify leaf predicate as Equi vs Non Equi
+ if (jlpi.comparisonType.equals(SqlKind.EQUALS)) {
+ equiLPIList.add(jlpi);
+ } else {
+ nonEquiLPIList.add(jlpi);
+ }
+
+ // 2.3 Maintain join keys coming from left vs right (in child &
+ // Join Schema)
+ projsFromLeftPartOfJoinKeys.addAll(jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeys.addAll(jlpi.getProjsFromRightPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeysInJoinSchema.addAll(jlpi
+ .getProjsFromRightPartOfJoinKeysInJoinSchema());
+
+ // 2.4 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from left
+ for (Integer projIndx : jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndx);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndx, tmpJLPILst);
+ }
+
+ // 2.5 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from right
+ for (Integer projIndx : jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ projIndxInJoin = projIndx + rightOffSet;
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndxInJoin);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndxInJoin, tmpJLPILst);
+ }
+
+ }
+
+ // 3. Update Update Join Key to List to use
+ // ImmutableList
+ for (Entry> e : tmpMapOfProjIndxInJoinSchemaToLeafPInfo
+ .entrySet()) {
+ mapOfProjIndxInJoinSchemaToLeafPInfo.put(e.getKey(), ImmutableList.copyOf(e.getValue()));
+ }
+
+ // 4. Construct JoinPredicateInfo
+ jpi = new JoinPredicateInfo(nonEquiLPIList, equiLPIList, projsFromLeftPartOfJoinKeys,
+ projsFromRightPartOfJoinKeys, projsFromRightPartOfJoinKeysInJoinSchema,
+ mapOfProjIndxInJoinSchemaToLeafPInfo);
+ return jpi;
+ }
+ }
+
+ /**
+ * JoinLeafPredicateInfo represents leaf predicate in Join condition
+ * (conjuctive lement).
+ *
+ * JoinLeafPredicateInfo:
+ * 1. Stores list of expressions from left and right child which is part of
+ * equi join keys.
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ */
+ public static class JoinLeafPredicateInfo {
+ private final SqlKind comparisonType;
+ private final ImmutableList joinKeyExprsFromLeft;
+ private final ImmutableList joinKeyExprsFromRight;
+ private final ImmutableSet projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet projsFromRightPartOfJoinKeysInJoinSchema;
+
+ public JoinLeafPredicateInfo(SqlKind comparisonType, List joinKeyExprsFromLeft,
+ List joinKeyExprsFromRight, Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema) {
+ this.comparisonType = comparisonType;
+ this.joinKeyExprsFromLeft = ImmutableList.copyOf(joinKeyExprsFromLeft);
+ this.joinKeyExprsFromRight = ImmutableList.copyOf(joinKeyExprsFromRight);
+ this.projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ this.projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ this.projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ }
+
+ public List getJoinKeyExprsFromLeft() {
+ return this.joinKeyExprsFromLeft;
+ }
+
+ public List getJoinKeyExprsFromRight() {
+ return this.joinKeyExprsFromRight;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return this.projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return this.projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return this.projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return this.projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoin j, RexNode pe) {
+ JoinLeafPredicateInfo jlpi = null;
+ List filterNulls = new ArrayList();
+ List joinKeyExprsFromLeft = new ArrayList();
+ List joinKeyExprsFromRight = new ArrayList();
+ Set projsFromLeftPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ // 1. Split leaf join predicate to expressions from left, right
+ RelOptUtil.splitJoinCondition(j.getSystemFieldList(), j.getLeft(), j.getRight(), pe,
+ joinKeyExprsFromLeft, joinKeyExprsFromRight, filterNulls, null);
+
+ // 2. For left expressions, collect child projection indexes used
+ InputReferencedVisitor irvLeft = new InputReferencedVisitor();
+ irvLeft.apply(joinKeyExprsFromLeft);
+ projsFromLeftPartOfJoinKeysInChildSchema.addAll(irvLeft.inputPosReferenced);
+
+ // 3. For right expressions, collect child projection indexes used
+ InputReferencedVisitor irvRight = new InputReferencedVisitor();
+ irvRight.apply(joinKeyExprsFromRight);
+ projsFromRightPartOfJoinKeysInChildSchema.addAll(irvRight.inputPosReferenced);
+
+ // 3. Translate projection indexes from right to join schema, by adding
+ // offset.
+ for (Integer indx : projsFromRightPartOfJoinKeysInChildSchema) {
+ projsFromRightPartOfJoinKeysInJoinSchema.add(indx + rightOffSet);
+ }
+
+ // 4. Construct JoinLeafPredicateInfo
+ jlpi = new JoinLeafPredicateInfo(pe.getKind(), joinKeyExprsFromLeft, joinKeyExprsFromRight,
+ projsFromLeftPartOfJoinKeysInChildSchema, projsFromRightPartOfJoinKeysInChildSchema,
+ projsFromRightPartOfJoinKeysInJoinSchema);
+
+ return jlpi;
+ }
+ }
+
+ public static boolean limitRelNode(RelNode rel) {
+ if ((rel instanceof Sort) && ((Sort) rel).getCollation().getFieldCollations().isEmpty())
+ return true;
+
+ return false;
+ }
+
+ public static boolean orderRelNode(RelNode rel) {
+ if ((rel instanceof Sort) && !((Sort) rel).getCollation().getFieldCollations().isEmpty())
+ return true;
+
+ return false;
+ }
+
+ /**
+ * Get top level select starting from root. Assumption here is root can only
+ * be Sort & Project. Also the top project should be at most 2 levels
+ * below Sort; i.e Sort(Limit)-Sort(OB)-Select
+ *
+ * @param rootRel
+ * @return
+ */
+ public static Pair getTopLevelSelect(final RelNode rootRel) {
+ RelNode tmpRel = rootRel;
+ RelNode parentOforiginalProjRel = rootRel;
+ HiveProject originalProjRel = null;
+
+ while (tmpRel != null) {
+ if (tmpRel instanceof HiveProject) {
+ originalProjRel = (HiveProject) tmpRel;
+ break;
+ }
+ parentOforiginalProjRel = tmpRel;
+ tmpRel = tmpRel.getInput(0);
+ }
+
+ return (new Pair(parentOforiginalProjRel, originalProjRel));
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
new file mode 100644
index 0000000..837399b
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveDefaultRelMetadataProvider {
+ private HiveDefaultRelMetadataProvider() {
+ }
+
+ public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList
+ .of(HiveRelMdDistinctRowCount.SOURCE,
+ HiveRelMdSelectivity.SOURCE,
+ HiveRelMdRowCount.SOURCE,
+ HiveRelMdUniqueKeys.SOURCE,
+ new DefaultRelMetadataProvider()));
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
new file mode 100644
index 0000000..10fdcc6
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
+import org.apache.calcite.sql.type.SqlTypeName;
+
+public class HiveTypeSystemImpl extends RelDataTypeSystemImpl {
+ // TODO: This should come from type system; Currently there is no definition
+ // in type system for this.
+ private static final int MAX_DECIMAL_PRECISION = 38;
+ private static final int MAX_DECIMAL_SCALE = 38;
+ private static final int DEFAULT_DECIMAL_PRECISION = 10;
+ private static final int MAX_VARCHAR_PRECISION = 65535;
+ private static final int MAX_CHAR_PRECISION = 255;
+ private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE;
+ private static final int MAX_TIMESTAMP_PRECISION = 9;
+
+ @Override
+ public int getMaxScale(SqlTypeName typeName) {
+ switch (typeName) {
+ case DECIMAL:
+ return getMaxNumericScale();
+ case INTERVAL_DAY_TIME:
+ case INTERVAL_YEAR_MONTH:
+ return SqlTypeName.MAX_INTERVAL_FRACTIONAL_SECOND_PRECISION;
+ default:
+ return -1;
+ }
+ }
+
+ @Override
+ public int getDefaultPrecision(SqlTypeName typeName) {
+ switch (typeName) {
+ // Hive will always require user to specify exact sizes for char, varchar;
+ // Binary doesn't need any sizes; Decimal has the default of 10.
+ case CHAR:
+ case VARCHAR:
+ case BINARY:
+ case VARBINARY:
+ case TIME:
+ case TIMESTAMP:
+ return getMaxPrecision(typeName);
+ case DECIMAL:
+ return DEFAULT_DECIMAL_PRECISION;
+ case INTERVAL_DAY_TIME:
+ case INTERVAL_YEAR_MONTH:
+ return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION;
+ default:
+ return -1;
+ }
+ }
+
+ @Override
+ public int getMaxPrecision(SqlTypeName typeName) {
+ switch (typeName) {
+ case DECIMAL:
+ return getMaxNumericPrecision();
+ case VARCHAR:
+ return MAX_VARCHAR_PRECISION;
+ case CHAR:
+ return MAX_CHAR_PRECISION;
+ case VARBINARY:
+ case BINARY:
+ return MAX_BINARY_PRECISION;
+ case TIME:
+ case TIMESTAMP:
+ return MAX_TIMESTAMP_PRECISION;
+ case INTERVAL_DAY_TIME:
+ case INTERVAL_YEAR_MONTH:
+ return SqlTypeName.MAX_INTERVAL_START_PRECISION;
+ default:
+ return -1;
+ }
+ }
+
+ @Override
+ public int getMaxNumericScale() {
+ return MAX_DECIMAL_SCALE;
+ }
+
+ @Override
+ public int getMaxNumericPrecision() {
+ return MAX_DECIMAL_PRECISION;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
new file mode 100644
index 0000000..6d57a8d
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -0,0 +1,355 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.calcite.plan.RelOptAbstractTable;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.plan.RelOptUtil.InputFinder;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.logical.LogicalTableScan;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+
+public class RelOptHiveTable extends RelOptAbstractTable {
+ private final Table hiveTblMetadata;
+ private final String tblAlias;
+ private final ImmutableList hiveNonPartitionCols;
+ private final ImmutableMap hiveNonPartitionColsMap;
+ private final ImmutableMap hivePartitionColsMap;
+ private final int noOfProjs;
+ final HiveConf hiveConf;
+
+ private double rowCount = -1;
+ Map hiveColStatsMap = new HashMap();
+ PrunedPartitionList partitionList;
+ Map partitionCache;
+ AtomicInteger noColsMissingStats;
+
+ protected static final Log LOG = LogFactory
+ .getLog(RelOptHiveTable.class
+ .getName());
+
+ public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, String tblAlias, RelDataType rowType,
+ Table hiveTblMetadata, List hiveNonPartitionCols,
+ List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) {
+ super(calciteSchema, qualifiedTblName, rowType);
+ this.hiveTblMetadata = hiveTblMetadata;
+ this.tblAlias = tblAlias;
+ this.hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols);
+ this.hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0);
+ this.hivePartitionColsMap = getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size());
+ this.noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size();
+ this.hiveConf = hconf;
+ this.partitionCache = partitionCache;
+ this.noColsMissingStats = noColsMissingStats;
+ }
+
+ private static ImmutableMap getColInfoMap(List hiveCols,
+ int startIndx) {
+ Builder bldr = ImmutableMap. builder();
+
+ int indx = startIndx;
+ for (ColumnInfo ci : hiveCols) {
+ bldr.put(indx, ci);
+ indx++;
+ }
+
+ return bldr.build();
+ }
+
+ @Override
+ public boolean isKey(ImmutableBitSet arg0) {
+ return false;
+ }
+
+ @Override
+ public RelNode toRel(ToRelContext context) {
+ return new LogicalTableScan(context.getCluster(), this);
+ }
+
+ @Override
+ public T unwrap(Class arg0) {
+ return arg0.isInstance(this) ? arg0.cast(this) : null;
+ }
+
+ @Override
+ public double getRowCount() {
+ if (rowCount == -1) {
+ if (null == partitionList) {
+ // we are here either unpartitioned table or partitioned table with no predicates
+ computePartitionList(hiveConf, null);
+ }
+ if (hiveTblMetadata.isPartitioned()) {
+ List rowCounts = StatsUtils.getBasicStatForPartitions(
+ hiveTblMetadata, partitionList.getNotDeniedPartns(),
+ StatsSetupConst.ROW_COUNT);
+ rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts);
+
+ } else {
+ rowCount = StatsUtils.getNumRows(hiveTblMetadata);
+ }
+ }
+
+ if (rowCount == -1)
+ noColsMissingStats.getAndIncrement();
+
+ return rowCount;
+ }
+
+ public Table getHiveTableMD() {
+ return hiveTblMetadata;
+ }
+
+ public String getTableAlias() {
+ // NOTE: Calcite considers tbls to be equal if their names are the same. Hence
+ // we need to provide Calcite the fully qualified table name (dbname.tblname)
+ // and not the user provided aliases.
+ // However in HIVE DB name can not appear in select list; in case of join
+ // where table names differ only in DB name, Hive would require user
+ // introducing explicit aliases for tbl.
+ if (tblAlias == null)
+ return hiveTblMetadata.getTableName();
+ else
+ return tblAlias;
+ }
+
+ private String getColNamesForLogging(Set colLst) {
+ StringBuffer sb = new StringBuffer();
+ boolean firstEntry = true;
+ for (String colName : colLst) {
+ if (firstEntry) {
+ sb.append(colName);
+ firstEntry = false;
+ } else {
+ sb.append(", " + colName);
+ }
+ }
+ return sb.toString();
+ }
+
+ public void computePartitionList(HiveConf conf, RexNode pruneNode) {
+
+ try {
+ if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) {
+ // there is no predicate on partitioning column, we need all partitions in this case.
+ partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), partitionCache);
+ return;
+ }
+
+ // We have valid pruning expressions, only retrieve qualifying partitions
+ ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+
+ partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache);
+ } catch (HiveException he) {
+ throw new RuntimeException(he);
+ }
+ }
+
+ private void updateColStats(Set projIndxLst) {
+ List nonPartColNamesThatRqrStats = new ArrayList();
+ List nonPartColIndxsThatRqrStats = new ArrayList();
+ List partColNamesThatRqrStats = new ArrayList();
+ List partColIndxsThatRqrStats = new ArrayList();
+ Set colNamesFailedStats = new HashSet();
+
+ // 1. Separate required columns to Non Partition and Partition Cols
+ ColumnInfo tmp;
+ for (Integer pi : projIndxLst) {
+ if (hiveColStatsMap.get(pi) == null) {
+ if ((tmp = hiveNonPartitionColsMap.get(pi)) != null) {
+ nonPartColNamesThatRqrStats.add(tmp.getInternalName());
+ nonPartColIndxsThatRqrStats.add(pi);
+ } else if ((tmp = hivePartitionColsMap.get(pi)) != null) {
+ partColNamesThatRqrStats.add(tmp.getInternalName());
+ partColIndxsThatRqrStats.add(pi);
+ } else {
+ noColsMissingStats.getAndIncrement();
+ String logMsg = "Unable to find Column Index: " + pi + ", in "
+ + hiveTblMetadata.getCompleteName();
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+ }
+
+ if (null == partitionList) {
+ // We could be here either because its an unpartitioned table or because
+ // there are no pruning predicates on a partitioned table.
+ computePartitionList(hiveConf, null);
+ }
+
+ // 2. Obtain Col Stats for Non Partition Cols
+ if (nonPartColNamesThatRqrStats.size() > 0) {
+ List hiveColStats;
+
+ if (!hiveTblMetadata.isPartitioned()) {
+ // 2.1 Handle the case for unpartitioned table.
+ hiveColStats = StatsUtils.getTableColumnStats(hiveTblMetadata, hiveNonPartitionCols,
+ nonPartColNamesThatRqrStats);
+
+ // 2.1.1 Record Column Names that we needed stats for but couldn't
+ if (hiveColStats == null) {
+ colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
+ } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
+ Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats);
+
+ Set setOfObtainedColStats = new HashSet();
+ for (ColStatistics cs : hiveColStats) {
+ setOfObtainedColStats.add(cs.getColumnName());
+ }
+ setOfFiledCols.removeAll(setOfObtainedColStats);
+
+ colNamesFailedStats.addAll(setOfFiledCols);
+ }
+ } else {
+ // 2.2 Obtain col stats for partitioned table.
+ try {
+ if (partitionList.getNotDeniedPartns().isEmpty()) {
+ // no need to make a metastore call
+ rowCount = 0;
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ // add empty stats object for each column
+ hiveColStats.add(new ColStatistics(hiveTblMetadata.getTableName(), c, null));
+ }
+ colNamesFailedStats.clear();
+ } else {
+ Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList,
+ hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats,
+ nonPartColNamesThatRqrStats, true, true);
+ rowCount = stats.getNumRows();
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ ColStatistics cs = stats.getColumnStatisticsFromColName(c);
+ if (cs != null) {
+ hiveColStats.add(cs);
+ } else {
+ colNamesFailedStats.add(c);
+ }
+ }
+ }
+ } catch (HiveException e) {
+ String logMsg = "Collecting stats failed.";
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
+ for (int i = 0; i < hiveColStats.size(); i++) {
+ hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
+ }
+ }
+ }
+
+ // 3. Obtain Stats for Partition Cols
+ if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
+ ColStatistics cStats = null;
+ for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
+ cStats = new ColStatistics(hiveTblMetadata.getTableName(),
+ partColNamesThatRqrStats.get(i), hivePartitionColsMap.get(
+ partColIndxsThatRqrStats.get(i)).getTypeName());
+ cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i)));
+ hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats);
+ }
+ }
+
+ // 4. Warn user if we could get stats for required columns
+ if (!colNamesFailedStats.isEmpty()) {
+ String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: "
+ + getColNamesForLogging(colNamesFailedStats);
+ LOG.error(logMsg);
+ noColsMissingStats.getAndAdd(colNamesFailedStats.size());
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ private int getDistinctCount(Set partitions, String partColName) {
+ Set distinctVals = new HashSet(partitions.size());
+ for (Partition partition : partitions) {
+ distinctVals.add(partition.getSpec().get(partColName));
+ }
+ return distinctVals.size();
+ }
+
+ public List getColStat(List projIndxLst) {
+ ImmutableList.Builder colStatsBldr = ImmutableList. builder();
+
+ if (projIndxLst != null) {
+ updateColStats(new HashSet(projIndxLst));
+ for (Integer i : projIndxLst) {
+ colStatsBldr.add(hiveColStatsMap.get(i));
+ }
+ } else {
+ List pILst = new ArrayList();
+ for (Integer i = 0; i < noOfProjs; i++) {
+ pILst.add(i);
+ }
+ updateColStats(new HashSet(pILst));
+ for (Integer pi : pILst) {
+ colStatsBldr.add(hiveColStatsMap.get(pi));
+ }
+ }
+
+ return colStatsBldr.build();
+ }
+
+ /*
+ * use to check if a set of columns are all partition columns.
+ * true only if:
+ * - all columns in BitSet are partition
+ * columns.
+ */
+ public boolean containsPartitionColumnsOnly(ImmutableBitSet cols) {
+
+ for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) {
+ if (!hivePartitionColsMap.containsKey(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java
new file mode 100644
index 0000000..f182846
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/TraitsUtil.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+
+public class TraitsUtil {
+ public static RelTraitSet getSortTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelCollation collation) {
+ return traitSet.plus(collation);
+ }
+
+ public static RelTraitSet getDefaultTraitSet(RelOptCluster cluster) {
+ return cluster.traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java
new file mode 100644
index 0000000..71b6680
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java
@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptCostFactory;
+import org.apache.calcite.plan.RelOptUtil;
+
+// TODO: This should inherit from VolcanoCost and should just override isLE method.
+public class HiveCost implements RelOptCost {
+ // ~ Static fields/initializers ---------------------------------------------
+
+ public static final HiveCost INFINITY = new HiveCost(Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY) {
+ @Override
+ public String toString() {
+ return "{inf}";
+ }
+ };
+
+ public static final HiveCost HUGE = new HiveCost(Double.MAX_VALUE, Double.MAX_VALUE,
+ Double.MAX_VALUE) {
+ @Override
+ public String toString() {
+ return "{huge}";
+ }
+ };
+
+ public static final HiveCost ZERO = new HiveCost(0.0, 0.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{0}";
+ }
+ };
+
+ public static final HiveCost TINY = new HiveCost(1.0, 1.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{tiny}";
+ }
+ };
+
+ public static final RelOptCostFactory FACTORY = new Factory();
+
+ // ~ Instance fields --------------------------------------------------------
+
+ final double cpu;
+ final double io;
+ final double rowCount;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ HiveCost(double rowCount, double cpu, double io) {
+ assert rowCount >= 0d;
+ assert cpu >= 0d;
+ assert io >= 0d;
+ this.rowCount = rowCount;
+ this.cpu = cpu;
+ this.io = io;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ public double getCpu() {
+ return cpu;
+ }
+
+ public boolean isInfinite() {
+ return (this == INFINITY) || (this.rowCount == Double.POSITIVE_INFINITY)
+ || (this.cpu == Double.POSITIVE_INFINITY) || (this.io == Double.POSITIVE_INFINITY);
+ }
+
+ public double getIo() {
+ return io;
+ }
+
+ // TODO: If two cost is equal, could we do any better than comparing
+ // cardinality (may be some other heuristics to break the tie)
+ public boolean isLe(RelOptCost other) {
+ return this == other || this.rowCount <= other.getRows();
+ /*
+ * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows
+ * <= other.getRows())) { return true; } else { return false; }
+ */
+ }
+
+ public boolean isLt(RelOptCost other) {
+ return this.rowCount < other.getRows();
+ /*
+ * return isLe(other) && !equals(other);
+ */
+ }
+
+ public double getRows() {
+ return rowCount;
+ }
+
+ public boolean equals(RelOptCost other) {
+ return (this == other) || ((this.rowCount) == (other.getRows()));
+
+ /*
+ * //TODO: should we consider cardinality as well? return (this == other) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()));
+ */
+ }
+
+ public boolean isEqWithEpsilon(RelOptCost other) {
+ return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON);
+ // Turn this one once we do the Algorithm selection in CBO
+ /*
+ * return (this == other) || (Math.abs((this.dCpu + this.dIo) -
+ * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON);
+ */
+ }
+
+ public RelOptCost minus(RelOptCost other) {
+ if (this == INFINITY) {
+ return this;
+ }
+
+ return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io
+ - other.getIo());
+ }
+
+ public RelOptCost multiplyBy(double factor) {
+ if (this == INFINITY) {
+ return this;
+ }
+ return new HiveCost(rowCount * factor, cpu * factor, io * factor);
+ }
+
+ public double divideBy(RelOptCost cost) {
+ // Compute the geometric average of the ratios of all of the factors
+ // which are non-zero and finite.
+ double d = 1;
+ double n = 0;
+ if ((this.rowCount != 0) && !Double.isInfinite(this.rowCount) && (cost.getRows() != 0)
+ && !Double.isInfinite(cost.getRows())) {
+ d *= this.rowCount / cost.getRows();
+ ++n;
+ }
+ if ((this.cpu != 0) && !Double.isInfinite(this.cpu) && (cost.getCpu() != 0)
+ && !Double.isInfinite(cost.getCpu())) {
+ d *= this.cpu / cost.getCpu();
+ ++n;
+ }
+ if ((this.io != 0) && !Double.isInfinite(this.io) && (cost.getIo() != 0)
+ && !Double.isInfinite(cost.getIo())) {
+ d *= this.io / cost.getIo();
+ ++n;
+ }
+ if (n == 0) {
+ return 1.0;
+ }
+ return Math.pow(d, 1 / n);
+ }
+
+ public RelOptCost plus(RelOptCost other) {
+ if ((this == INFINITY) || (other.isInfinite())) {
+ return INFINITY;
+ }
+ return new HiveCost(this.rowCount + other.getRows(), this.cpu + other.getCpu(), this.io
+ + other.getIo());
+ }
+
+ @Override
+ public String toString() {
+ return "{" + rowCount + " rows, " + cpu + " cpu, " + io + " io}";
+ }
+
+ private static class Factory implements RelOptCostFactory {
+ private Factory() {
+ }
+
+ public RelOptCost makeCost(double rowCount, double cpu, double io) {
+ return new HiveCost(rowCount, cpu, io);
+ }
+
+ public RelOptCost makeHugeCost() {
+ return HUGE;
+ }
+
+ public HiveCost makeInfiniteCost() {
+ return INFINITY;
+ }
+
+ public HiveCost makeTinyCost() {
+ return TINY;
+ }
+
+ public HiveCost makeZeroCost() {
+ return ZERO;
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java
new file mode 100644
index 0000000..c7e9217
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+// Use this once we have Join Algorithm selection
+public class HiveCostUtil {
+ private static final double cpuCostInNanoSec = 1.0;
+ private static final double netCostInNanoSec = 150 * cpuCostInNanoSec;
+ private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec;
+ @SuppressWarnings("unused")
+//Use this once we have Join Algorithm selection
+ private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec;
+
+ public static RelOptCost computCardinalityBasedCost(HiveRelNode hr) {
+ return new HiveCost(hr.getRows(), 0, 0);
+ }
+
+ public static HiveCost computeCost(HiveTableScan t) {
+ double cardinality = t.getRows();
+ return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
new file mode 100644
index 0000000..ebcd4f3
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import org.apache.calcite.plan.ConventionTraitDef;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.volcano.VolcanoPlanner;
+import org.apache.calcite.rel.RelCollationTraitDef;
+
+/**
+ * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive.
+ *
+ *
+ * It uses {@link org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost} as
+ * its cost model.
+ */
+public class HiveVolcanoPlanner extends VolcanoPlanner {
+ private static final boolean ENABLE_COLLATION_TRAIT = true;
+
+ /** Creates a HiveVolcanoPlanner. */
+ public HiveVolcanoPlanner() {
+ super(HiveCost.FACTORY, null);
+ }
+
+ public static RelOptPlanner createPlanner() {
+ final VolcanoPlanner planner = new HiveVolcanoPlanner();
+ planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+ if (ENABLE_COLLATION_TRAIT) {
+ planner.addRelTraitDef(RelCollationTraitDef.INSTANCE);
+ }
+ return planner;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
new file mode 100644
index 0000000..21ddc99
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.InvalidRelException;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.RelFactories.AggregateFactory;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveAggregate extends Aggregate implements HiveRelNode {
+
+ public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory();
+
+ public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ boolean indicator, ImmutableBitSet groupSet, List groupSets,
+ List aggCalls) throws InvalidRelException {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, indicator, groupSet,
+ groupSets, aggCalls);
+ }
+
+ @Override
+ public Aggregate copy(RelTraitSet traitSet, RelNode input,
+ boolean indicator, ImmutableBitSet groupSet,
+ List groupSets, List aggCalls) {
+ try {
+ return new HiveAggregate(getCluster(), traitSet, input, indicator, groupSet,
+ groupSets, aggCalls);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public double getRows() {
+ return RelMetadataQuery.getDistinctRowCount(this, groupSet, getCluster().getRexBuilder()
+ .makeLiteral(true));
+ }
+
+ private static class HiveAggRelFactory implements AggregateFactory {
+
+ @Override
+ public RelNode createAggregate(RelNode child, boolean indicator,
+ ImmutableBitSet groupSet, ImmutableList groupSets,
+ List aggCalls) {
+ try {
+ return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, indicator,
+ groupSet, groupSets, aggCalls);
+ } catch (InvalidRelException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
new file mode 100644
index 0000000..3e45a3f
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.RelFactories.FilterFactory;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+public class HiveFilter extends Filter implements HiveRelNode {
+
+ public static final FilterFactory DEFAULT_FILTER_FACTORY = new HiveFilterFactoryImpl();
+
+ public HiveFilter(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition);
+ }
+
+ @Override
+ public Filter copy(RelTraitSet traitSet, RelNode input, RexNode condition) {
+ assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION);
+ return new HiveFilter(getCluster(), traitSet, input, getCondition());
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ /**
+ * Implementation of {@link FilterFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter}
+ * .
+ */
+ private static class HiveFilterFactoryImpl implements FilterFactory {
+ @Override
+ public RelNode createFilter(RelNode child, RexNode condition) {
+ RelOptCluster cluster = child.getCluster();
+ HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition);
+ return filter;
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
new file mode 100644
index 0000000..724135b
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.InvalidRelException;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories.JoinFactory;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+//TODO: Should we convert MultiJoin to be a child of HiveJoin
+public class HiveJoin extends Join implements HiveRelNode {
+ // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN
+ // each parallel computation handles multiple splits where as in case of SMB
+ // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is
+ // hash joins where MAP_JOIN keeps the whole data set of non streaming tables
+ // in memory where as BUCKET_JOIN keeps only the b
+ public enum JoinAlgorithm {
+ NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN
+ }
+
+ public enum MapJoinStreamingRelation {
+ NONE, LEFT_RELATION, RIGHT_RELATION
+ }
+
+ public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl();
+
+ private final boolean leftSemiJoin;
+ private final JoinAlgorithm joinAlgorithm;
+ //This will be used once we do Join Algorithm selection
+ @SuppressWarnings("unused")
+ private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE;
+
+ public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, boolean leftSemiJoin) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped,
+ JoinAlgorithm.NONE, null, leftSemiJoin);
+ } catch (InvalidRelException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, Set variablesStopped,
+ JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin)
+ throws InvalidRelException {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType,
+ variablesStopped);
+ this.joinAlgorithm = joinAlgo;
+ this.leftSemiJoin = leftSemiJoin;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public final HiveJoin copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left,
+ RelNode right, JoinRelType joinType, boolean semiJoinDone) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType,
+ variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ public JoinAlgorithm getJoinAlgorithm() {
+ return joinAlgorithm;
+ }
+
+ public boolean isLeftSemiJoin() {
+ return leftSemiJoin;
+ }
+
+ /**
+ * Model cost of join as size of Inputs.
+ */
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ double leftRCount = RelMetadataQuery.getRowCount(getLeft());
+ double rightRCount = RelMetadataQuery.getRowCount(getRight());
+ return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0);
+ }
+
+ /**
+ * @return returns rowtype representing only the left join input
+ */
+ @Override
+ public RelDataType deriveRowType() {
+ if (leftSemiJoin) {
+ return deriveJoinRowType(left.getRowType(), null, JoinRelType.INNER,
+ getCluster().getTypeFactory(), null,
+ Collections. emptyList());
+ }
+ return super.deriveRowType();
+ }
+
+ private static class HiveJoinFactoryImpl implements JoinFactory {
+ /**
+ * Creates a join.
+ *
+ * @param left
+ * Left input
+ * @param right
+ * Right input
+ * @param condition
+ * Join condition
+ * @param joinType
+ * Join type
+ * @param variablesStopped
+ * Set of names of variables which are set by the LHS and used by
+ * the RHS and are not available to nodes above this JoinRel in the
+ * tree
+ * @param semiJoinDone
+ * Whether this join has been translated to a semi-join
+ */
+ @Override
+ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType,
+ Set variablesStopped, boolean semiJoinDone) {
+ return getJoin(left.getCluster(), left, right, condition, joinType, false);
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java
new file mode 100644
index 0000000..5fc64f3
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+public class HiveLimit extends SingleRel implements HiveRelNode {
+ private final RexNode offset;
+ private final RexNode fetch;
+
+ HiveLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RexNode offset,
+ RexNode fetch) {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child);
+ this.offset = offset;
+ this.fetch = fetch;
+ assert getConvention() instanceof HiveRelNode;
+ assert getConvention() == child.getConvention();
+ }
+
+ @Override
+ public HiveLimit copy(RelTraitSet traitSet, List newInputs) {
+ return new HiveLimit(getCluster(), traitSet, sole(newInputs), offset, fetch);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
new file mode 100644
index 0000000..6c215c9
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveProject extends Project implements HiveRelNode {
+
+ public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl();
+
+ private final List virtualCols;
+
+ /**
+ * Creates a HiveProject.
+ *
+ * @param cluster
+ * Cluster this relational expression belongs to
+ * @param child
+ * input relational expression
+ * @param exps
+ * List of expressions for the input columns
+ * @param rowType
+ * output row type
+ * @param flags
+ * values as in {@link Project.Flags}
+ */
+ public HiveProject(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ List extends RexNode> exps, RelDataType rowType, int flags) {
+ super(cluster, traitSet, child, exps, rowType, flags);
+ virtualCols = ImmutableList.copyOf(HiveCalciteUtil.getVirtualCols(exps));
+ }
+
+ /**
+ * Creates a HiveProject with no sort keys.
+ *
+ * @param child
+ * input relational expression
+ * @param exps
+ * set of expressions for the input columns
+ * @param fieldNames
+ * aliases of the expressions
+ */
+ public static HiveProject create(RelNode child, List extends RexNode> exps,
+ List fieldNames) throws CalciteSemanticException{
+ RelOptCluster cluster = child.getCluster();
+
+ // 1 Ensure columnNames are unique - CALCITE-411
+ if (fieldNames != null && !Util.isDistinct(fieldNames)) {
+ String msg = "Select list contains multiple expressions with the same name." + fieldNames;
+ throw new CalciteSemanticException(msg);
+ }
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames);
+ return create(cluster, child, exps, rowType, Collections. emptyList());
+ }
+
+ /**
+ * Creates a HiveProject.
+ */
+ public static HiveProject create(RelOptCluster cluster, RelNode child, List extends RexNode> exps,
+ RelDataType rowType, final List collationList) {
+ RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster);
+ return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a HiveProject.
+ */
+ public static HiveProject create(RelOptCluster cluster, RelNode child, List extends RexNode> exps,
+ RelDataType rowType, RelTraitSet traitSet, final List collationList) {
+ return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a relational expression which projects the output fields of a
+ * relational expression according to a partial mapping.
+ *
+ *
+ * A partial mapping is weaker than a permutation: every target has one
+ * source, but a source may have 0, 1 or more than one targets. Usually the
+ * result will have fewer fields than the source, unless some source fields
+ * are projected multiple times.
+ *
+ *
+ * This method could optimize the result as {@link #permute} does, but does
+ * not at present.
+ *
+ * @param rel
+ * Relational expression
+ * @param mapping
+ * Mapping from source fields to target fields. The mapping type must
+ * obey the constraints {@link MappingType#isMandatorySource()} and
+ * {@link MappingType#isSingleSource()}, as does
+ * {@link MappingType#INVERSE_FUNCTION}.
+ * @param fieldNames
+ * Field names; if null, or if a particular entry is null, the name
+ * of the permuted field is used
+ * @return relational expression which projects a subset of the input fields
+ * @throws CalciteSemanticException
+ */
+ public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) throws CalciteSemanticException {
+ assert mapping.getMappingType().isSingleSource();
+ assert mapping.getMappingType().isMandatorySource();
+
+ if (mapping.isIdentity()) {
+ return rel;
+ }
+
+ final List outputNameList = new ArrayList();
+ final List outputProjList = new ArrayList();
+ final List fields = rel.getRowType().getFieldList();
+ final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+
+ for (int i = 0; i < mapping.getTargetCount(); i++) {
+ int source = mapping.getSource(i);
+ final RelDataTypeField sourceField = fields.get(source);
+ outputNameList
+ .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField
+ .getName() : fieldNames.get(i));
+ outputProjList.add(rexBuilder.makeInputRef(rel, source));
+ }
+
+ return create(rel, outputProjList, outputNameList);
+ }
+
+ @Override
+ public Project copy(RelTraitSet traitSet, RelNode input, List exps,
+ RelDataType rowType) {
+ assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION);
+ return new HiveProject(getCluster(), traitSet, input, exps, rowType, getFlags());
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ public List getVirtualCols() {
+ return virtualCols;
+ }
+
+ /**
+ * Implementation of {@link ProjectFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject}
+ * .
+ */
+ private static class HiveProjectFactoryImpl implements ProjectFactory {
+
+ @Override
+ public RelNode createProject(RelNode child,
+ List extends RexNode> childExprs, List fieldNames) {
+ RelOptCluster cluster = child.getCluster();
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), childExprs, fieldNames);
+ RelNode project = HiveProject.create(cluster, child,
+ childExprs, rowType,
+ child.getTraitSet(), Collections. emptyList());
+
+ return project;
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java
new file mode 100644
index 0000000..30acfe2
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.rel.RelNode;
+
+public interface HiveRelNode extends RelNode {
+ void implement(Implementor implementor);
+
+ /** Calling convention for relational operations that occur in Hive. */
+ final Convention CONVENTION = new Convention.Impl("HIVE", HiveRelNode.class);
+
+ class Implementor {
+
+ public void visitChild(int ordinal, RelNode input) {
+ assert ordinal == 0;
+ ((HiveRelNode) input).implement(this);
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
new file mode 100644
index 0000000..18d2838
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.Map;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveSort extends Sort implements HiveRelNode {
+
+ public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory();
+
+ // NOTE: this is to work around Hive Calcite Limitations w.r.t OB.
+ // 1. Calcite can not accept expressions in OB; instead it needs to be expressed
+ // as VC in input Select.
+ // 2. Hive can not preserve ordering through select boundaries.
+ // 3. This map is used for outermost OB to migrate the VC corresponding OB
+ // expressions from input select.
+ // 4. This is used by ASTConverter after we are done with Calcite Planning
+ private ImmutableMap mapOfInputRefToRexCall;
+
+ public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ RelCollation collation, RexNode offset, RexNode fetch) {
+ super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
+ offset, fetch);
+ }
+
+ @Override
+ public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
+ RexNode offset, RexNode fetch) {
+ // TODO: can we blindly copy sort trait? What if inputs changed and we
+ // are now sorting by different cols
+ RelCollation canonizedCollation = traitSet.canonize(newCollation);
+ return new HiveSort(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ }
+
+ public RexNode getFetchExpr() {
+ return fetch;
+ }
+
+ public void setInputRefToCallMap(ImmutableMap refToCall) {
+ this.mapOfInputRefToRexCall = refToCall;
+ }
+
+ public Map getInputRefToCallMap() {
+ return this.mapOfInputRefToRexCall;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveSortRelFactory implements RelFactories.SortFactory {
+
+ @Override
+ public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
+ RexNode offset, RexNode fetch) {
+ return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
new file mode 100644
index 0000000..53021ea
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+
+/**
+ * Relational expression representing a scan of a HiveDB collection.
+ *
+ *
+ * Additional operations might be applied, using the "find" or "aggregate"
+ * methods.
+ *
+ */
+public class HiveTableScan extends TableScan implements HiveRelNode {
+
+ /**
+ * Creates a HiveTableScan.
+ *
+ * @param cluster
+ * Cluster
+ * @param traitSet
+ * Traits
+ * @param table
+ * Table
+ * @param table
+ * HiveDB table
+ */
+ public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
+ RelDataType rowtype) {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table);
+ assert getConvention() == HiveRelNode.CONVENTION;
+ }
+
+ @Override
+ public RelNode copy(RelTraitSet traitSet, List inputs) {
+ assert inputs.isEmpty();
+ return this;
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void register(RelOptPlanner planner) {
+
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+
+ }
+
+ @Override
+ public double getRows() {
+ return ((RelOptHiveTable) table).getRowCount();
+ }
+
+ public List getColStat(List projIndxLst) {
+ return ((RelOptHiveTable) table).getColStat(projIndxLst);
+ }
+}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java
new file mode 100644
index 0000000..72226e7
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor;
+
+public class HiveUnion extends Union {
+
+ public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory();
+
+ public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List inputs) {
+ super(cluster, traits, inputs, true);
+ }
+
+ @Override
+ public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) {
+ return new HiveUnion(this.getCluster(), traitSet, inputs);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveUnionRelFactory implements RelFactories.SetOpFactory {
+
+ @Override
+ public RelNode createSetOp(SqlKind kind, List inputs, boolean all) {
+ if (kind != SqlKind.UNION) {
+ throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind);
+ }
+ return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs);
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
new file mode 100644
index 0000000..dcaf831
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil.InputFinder;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.rules.FilterJoinRule;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+public abstract class HiveFilterJoinRule extends FilterJoinRule {
+
+ public static final HiveFilterJoinRule FILTER_ON_JOIN = new HiveFilterJoinMergeRule();
+
+ public static final HiveFilterJoinRule JOIN = new HiveFilterJoinTransposeRule();
+
+ /**
+ * Creates a PushFilterPastJoinRule with an explicit root operand.
+ */
+ protected HiveFilterJoinRule(RelOptRuleOperand operand, String id, boolean smart,
+ RelFactories.FilterFactory filterFactory, RelFactories.ProjectFactory projectFactory) {
+ super(operand, id, smart, filterFactory, projectFactory);
+ }
+
+ /**
+ * Rule that tries to push filter expressions into a join condition and into
+ * the inputs of the join.
+ */
+ public static class HiveFilterJoinMergeRule extends HiveFilterJoinRule {
+ public HiveFilterJoinMergeRule() {
+ super(RelOptRule.operand(Filter.class,
+ RelOptRule.operand(Join.class, RelOptRule.any())),
+ "HiveFilterJoinRule:filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+ HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Filter filter = call.rel(0);
+ Join join = call.rel(1);
+ super.perform(call, filter, join);
+ }
+ }
+
+ public static class HiveFilterJoinTransposeRule extends HiveFilterJoinRule {
+ public HiveFilterJoinTransposeRule() {
+ super(RelOptRule.operand(Join.class, RelOptRule.any()),
+ "HiveFilterJoinRule:no-filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+ HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ super.perform(call, null, join);
+ }
+ }
+
+ /*
+ * Any predicates pushed down to joinFilters that aren't equality conditions:
+ * put them back as aboveFilters because Hive doesn't support not equi join
+ * conditions.
+ */
+ @Override
+ protected void validateJoinFilters(List aboveFilters, List joinFilters,
+ Join join, JoinRelType joinType) {
+ if (joinType.equals(JoinRelType.INNER)) {
+ ListIterator filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ boolean validHiveJoinFilter = false;
+
+ if ((c.getOperator().getKind() == SqlKind.EQUALS)) {
+ validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left & right side
+ // of join condition. Example: Hive disallows
+ // (r1.x +r2.x)=(r1.y+r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+ validHiveJoinFilter = true;
+ // NOTE: Hive dis-allows projections from both left & right side of
+ // join in in equality condition. Example: Hive disallows (r1.x <
+ // r2.x) on join condition.
+ if (filterRefersToBothSidesOfJoin(c, join)) {
+ validHiveJoinFilter = false;
+ }
+ }
+
+ if (validHiveJoinFilter)
+ continue;
+ }
+
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+ }
+ }
+
+ private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) {
+ boolean refersToBothSides = false;
+
+ int joinNoOfProjects = j.getRowType().getFieldCount();
+ ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(
+ new BitSet(joinNoOfProjects));
+ ImmutableBitSet allLeftProjs = filterProjs.union(
+ ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount()));
+ ImmutableBitSet allRightProjs = filterProjs.union(
+ ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects));
+
+ filterProjs = filterProjs.union(InputFinder.bits(filter));
+
+ if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+ refersToBothSides = true;
+
+ return refersToBothSides;
+ }
+}
+
+// End PushFilterPastJoinRule.java
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java
new file mode 100644
index 0000000..ba28055
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HivePartitionPruneRule extends RelOptRule {
+
+ HiveConf conf;
+
+ public HivePartitionPruneRule(HiveConf conf) {
+ super(operand(HiveFilter.class, operand(HiveTableScan.class, none())));
+ this.conf = conf;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilter filter = call.rel(0);
+ HiveTableScan tScan = call.rel(1);
+ perform(call, filter, tScan);
+ }
+
+ protected void perform(RelOptRuleCall call, Filter filter,
+ HiveTableScan tScan) {
+
+ RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
+ RexNode predicate = filter.getCondition();
+
+ Pair predicates = PartitionPrune
+ .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
+ RexNode partColExpr = predicates.left;
+ hiveTable.computePartitionList(conf, partColExpr);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
new file mode 100644
index 0000000..8b90a15
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.rel.rules.ProjectMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+//Currently not used, turn this on later
+public class HiveProjectMergeRule extends ProjectMergeRule {
+ public static final HiveProjectMergeRule INSTANCE = new HiveProjectMergeRule();
+
+ public HiveProjectMergeRule() {
+ super(true, HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java
new file mode 100644
index 0000000..2fb9a52
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+public class PartitionPrune {
+
+ /**
+ * Breaks the predicate into 2 pieces. The first piece is the expressions that
+ * only contain partition columns and can be used for Partition Pruning; the
+ * second piece is the predicates that are left.
+ *
+ * @param cluster
+ * @param hiveTable
+ * @param predicate
+ * @return a Pair of expressions, each of which maybe null. The 1st predicate
+ * is expressions that only contain partition columns; the 2nd
+ * predicate contains the remaining predicates.
+ */
+ public static Pair extractPartitionPredicates(
+ RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
+ RexNode partitionPruningPred = predicate
+ .accept(new ExtractPartPruningPredicate(cluster, hiveTable));
+ RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(
+ cluster, partitionPruningPred));
+ return new Pair(partitionPruningPred, remainingPred);
+ }
+
+ public static class ExtractPartPruningPredicate extends
+ RexVisitorImpl {
+
+ final RelOptHiveTable hiveTable;
+ final RelDataType rType;
+ final Set partCols;
+ final RelOptCluster cluster;
+
+ public ExtractPartPruningPredicate(RelOptCluster cluster,
+ RelOptHiveTable hiveTable) {
+ super(true);
+ this.hiveTable = hiveTable;
+ rType = hiveTable.getRowType();
+ List pfs = hiveTable.getHiveTableMD().getPartCols();
+ partCols = new HashSet();
+ for (FieldSchema pf : pfs) {
+ partCols.add(pf.getName());
+ }
+ this.cluster = cluster;
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+ if (partCols.contains(f.getName())) {
+ return inputRef;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ List args = new LinkedList();
+ boolean argsPruned = false;
+
+ GenericUDF hiveUDF = SqlFunctionConverter.getHiveUDF(call.getOperator(),
+ call.getType(), call.operands.size());
+ if (hiveUDF != null &&
+ !FunctionRegistry.isDeterministic(hiveUDF)) {
+ return null;
+ }
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ } else {
+ argsPruned = true;
+ }
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ return argsPruned ? null : call;
+ } else {
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+
+ }
+
+ public static class ExtractRemainingPredicate extends RexVisitorImpl {
+
+ List pruningPredicates;
+ final RelOptCluster cluster;
+
+ public ExtractRemainingPredicate(RelOptCluster cluster,
+ RexNode partPruningExpr) {
+ super(true);
+ this.cluster = cluster;
+ pruningPredicates = new ArrayList();
+ flattenPredicates(partPruningExpr);
+ }
+
+ private void flattenPredicates(RexNode r) {
+ if (r instanceof RexCall
+ && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) {
+ for (RexNode c : ((RexCall) r).getOperands()) {
+ flattenPredicates(c);
+ }
+ } else {
+ pruningPredicates.add(r);
+ }
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ return inputRef;
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ if (pruningPredicates.contains(call)) {
+ return null;
+ } else {
+ return call;
+ }
+ }
+
+ List args = new LinkedList();
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ }
+ }
+
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
new file mode 100644
index 0000000..b52779c
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class FilterSelectivityEstimator extends RexVisitorImpl {
+ private final RelNode childRel;
+ private final double childCardinality;
+
+ protected FilterSelectivityEstimator(RelNode childRel) {
+ super(true);
+ this.childRel = childRel;
+ this.childCardinality = RelMetadataQuery.getRowCount(childRel);
+ }
+
+ public Double estimateSelectivity(RexNode predicate) {
+ return predicate.accept(this);
+ }
+
+ public Double visitCall(RexCall call) {
+ if (!deep) {
+ return 1.0;
+ }
+
+ /*
+ * Ignore any predicates on partition columns because we have already
+ * accounted for these in the Table row count.
+ */
+ if (isPartitionPredicate(call, this.childRel)) {
+ return 1.0;
+ }
+
+ Double selectivity = null;
+ SqlKind op = getOp(call);
+
+ switch (op) {
+ case AND: {
+ selectivity = computeConjunctionSelectivity(call);
+ break;
+ }
+
+ case OR: {
+ selectivity = computeDisjunctionSelectivity(call);
+ break;
+ }
+
+ case NOT:
+ case NOT_EQUALS: {
+ selectivity = computeNotEqualitySelectivity(call);
+ break;
+ }
+
+ case LESS_THAN_OR_EQUAL:
+ case GREATER_THAN_OR_EQUAL:
+ case LESS_THAN:
+ case GREATER_THAN: {
+ selectivity = ((double) 1 / (double) 3);
+ break;
+ }
+
+ case IN: {
+ // TODO: 1) check for duplicates 2) We assume in clause values to be
+ // present in NDV which may not be correct (Range check can find it) 3) We
+ // assume values in NDV set is uniformly distributed over col values
+ // (account for skewness - histogram).
+ selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+ if (selectivity <= 0.0) {
+ selectivity = 0.10;
+ } else if (selectivity >= 1.0) {
+ selectivity = 1.0;
+ }
+ break;
+ }
+
+ default:
+ selectivity = computeFunctionSelectivity(call);
+ }
+
+ return selectivity;
+ }
+
+ /**
+ * NDV of "f1(x, y, z) != f2(p, q, r)" ->
+ * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
+ *
+ *
+ * @param call
+ * @return
+ */
+ private Double computeNotEqualitySelectivity(RexCall call) {
+ double tmpNDV = getMaxNDV(call);
+
+ if (tmpNDV > 1)
+ return (tmpNDV - (double) 1) / tmpNDV;
+ else
+ return 1.0;
+ }
+
+ /**
+ * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z).
+ *
+ * Note that >, >=, <, <=, = ... are considered generic functions and uses
+ * this method to find their selectivity.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeFunctionSelectivity(RexCall call) {
+ return 1 / getMaxNDV(call);
+ }
+
+ /**
+ * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total
+ * number of tuples from child and m1 and m2 is the expected number of tuples
+ * from each part of the disjunction predicate.
+ *
+ * Note we compute m1. m2.. by applying selectivity of the disjunctive element
+ * on the cardinality from child.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeDisjunctionSelectivity(RexCall call) {
+ Double tmpCardinality;
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode dje : call.getOperands()) {
+ tmpSelectivity = dje.accept(this);
+ if (tmpSelectivity == null) {
+ tmpSelectivity = 0.99;
+ }
+ tmpCardinality = childCardinality * tmpSelectivity;
+
+ if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
+ tmpSelectivity = (1 - tmpCardinality / childCardinality);
+ } else {
+ tmpSelectivity = 1.0;
+ }
+
+ selectivity *= tmpSelectivity;
+ }
+
+ if (selectivity < 0.0)
+ selectivity = 0.0;
+
+ return (1 - selectivity);
+ }
+
+ /**
+ * Selectivity of conjunctive predicate -> (selectivity of conjunctive
+ * element1) * (selectivity of conjunctive element2)...
+ *
+ * @param call
+ * @return
+ */
+ private Double computeConjunctionSelectivity(RexCall call) {
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode cje : call.getOperands()) {
+ tmpSelectivity = cje.accept(this);
+ if (tmpSelectivity != null) {
+ selectivity *= tmpSelectivity;
+ }
+ }
+
+ return selectivity;
+ }
+
+ private Double getMaxNDV(RexCall call) {
+ double tmpNDV;
+ double maxNDV = 1.0;
+ InputReferencedVisitor irv;
+
+ for (RexNode op : call.getOperands()) {
+ if (op instanceof RexInputRef) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
+ ((RexInputRef) op).getIndex());
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ } else {
+ irv = new InputReferencedVisitor();
+ irv.apply(op);
+ for (Integer childProjIndx : irv.inputPosReferenced) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx);
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ }
+ }
+ }
+
+ return maxNDV;
+ }
+
+ private boolean isPartitionPredicate(RexNode expr, RelNode r) {
+ if (r instanceof Project) {
+ expr = RelOptUtil.pushFilterPastProject(expr, (Project) r);
+ return isPartitionPredicate(expr, ((Project) r).getInput());
+ } else if (r instanceof Filter) {
+ return isPartitionPredicate(expr, ((Filter) r).getInput());
+ } else if (r instanceof HiveTableScan) {
+ RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScan) r).getTable();
+ ImmutableBitSet cols = RelOptUtil.InputFinder.bits(expr);
+ return table.containsPartitionColumnsOnly(cols);
+ }
+ return false;
+ }
+
+ private SqlKind getOp(RexCall call) {
+ SqlKind op = call.getKind();
+
+ if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+ && SqlTypeUtil.inBooleanFamily(call.getType())) {
+ SqlOperator sqlOp = call.getOperator();
+ String opName = (sqlOp != null) ? sqlOp.getName() : "";
+ if (opName.equalsIgnoreCase("in")) {
+ op = SqlKind.IN;
+ }
+ }
+
+ return op;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
new file mode 100644
index 0000000..1220401
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
+
+ private static final HiveRelMdDistinctRowCount INSTANCE =
+ new HiveRelMdDistinctRowCount();
+
+ public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider
+ .of(ImmutableList.of(
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE),
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.CUMULATIVE_COST.method, INSTANCE)));
+
+ private HiveRelMdDistinctRowCount() {
+ }
+
+ // Catch-all rule when none of the others apply.
+ @Override
+ public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveTableScan) {
+ return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate);
+ }
+ /*
+ * For now use Calcite' default formulas for propagating NDVs up the Query
+ * Tree.
+ */
+ return super.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ List projIndxLst = HiveCalciteUtil
+ .translateBitSetToProjIndx(groupKey);
+ List colStats = htRel.getColStat(projIndxLst);
+ Double noDistinctRows = 1.0;
+ for (ColStatistics cStat : colStats) {
+ noDistinctRows *= cStat.getCountDistint();
+ }
+
+ return Math.min(noDistinctRows, htRel.getRows());
+ }
+
+ public static Double getDistinctRowCount(RelNode r, int indx) {
+ ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx);
+ return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r
+ .getCluster().getRexBuilder().makeLiteral(true));
+ }
+
+ @Override
+ public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveJoin) {
+ HiveJoin hjRel = (HiveJoin) rel;
+ //TODO: Improve this
+ if (hjRel.isLeftSemiJoin()) {
+ return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey,
+ rel.getCluster().getRexBuilder().makeLiteral(true));
+ } else {
+ return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(),
+ groupKey, predicate, true);
+ }
+ }
+
+ return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ /*
+ * Favor Broad Plans over Deep Plans.
+ */
+ public RelOptCost getCumulativeCost(HiveJoin rel) {
+ RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);
+ List inputs = rel.getInputs();
+ RelOptCost maxICost = HiveCost.ZERO;
+ for (RelNode input : inputs) {
+ RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input);
+ if (maxICost.isLt(iCost)) {
+ maxICost = iCost;
+ }
+ }
+ return cost.plus(maxICost);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
new file mode 100644
index 0000000..dabbe28
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -0,0 +1,439 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdRowCount;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HiveRelMdRowCount extends RelMdRowCount {
+
+ protected static final Log LOG = LogFactory.getLog(HiveRelMdRowCount.class.getName());
+
+
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+ .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount());
+
+ protected HiveRelMdRowCount() {
+ super();
+ }
+
+ public Double getRowCount(Join join) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(join);
+ if (pkfk != null) {
+ double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+ selectivity = Math.min(1.0, selectivity);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Identified Primary - Foreign Key relation:");
+ LOG.debug(RelOptUtil.toString(join));
+ LOG.debug(pkfk);
+ }
+ return pkfk.fkInfo.rowCount * selectivity;
+ }
+ return join.getRows();
+ }
+
+ public Double getRowCount(SemiJoin rel) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel);
+ if (pkfk != null) {
+ double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+ selectivity = Math.min(1.0, selectivity);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Identified Primary - Foreign Key relation:");
+ LOG.debug(RelOptUtil.toString(rel));
+ LOG.debug(pkfk);
+ }
+ return pkfk.fkInfo.rowCount * selectivity;
+ }
+ return super.getRowCount(rel);
+ }
+
+ static class PKFKRelationInfo {
+ public final int fkSide;
+ public final double ndvScalingFactor;
+ public final FKSideInfo fkInfo;
+ public final PKSideInfo pkInfo;
+ public final boolean isPKSideSimple;
+
+ PKFKRelationInfo(int fkSide,
+ FKSideInfo fkInfo,
+ PKSideInfo pkInfo,
+ double ndvScalingFactor,
+ boolean isPKSideSimple) {
+ this.fkSide = fkSide;
+ this.fkInfo = fkInfo;
+ this.pkInfo = pkInfo;
+ this.ndvScalingFactor = ndvScalingFactor;
+ this.isPKSideSimple = isPKSideSimple;
+ }
+
+ public String toString() {
+ return String.format(
+ "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" +
+ "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n",
+ fkSide,
+ fkInfo,
+ pkInfo,
+ isPKSideSimple,
+ ndvScalingFactor);
+ }
+ }
+
+ static class FKSideInfo {
+ public final double rowCount;
+ public final double distinctCount;
+ public FKSideInfo(double rowCount, double distinctCount) {
+ this.rowCount = rowCount;
+ this.distinctCount = distinctCount;
+ }
+
+ public String toString() {
+ return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount);
+ }
+ }
+
+ static class PKSideInfo extends FKSideInfo {
+ public final double selectivity;
+ public PKSideInfo(double rowCount, double distinctCount, double selectivity) {
+ super(rowCount, distinctCount);
+ this.selectivity = selectivity;
+ }
+
+ public String toString() {
+ return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity);
+ }
+ }
+
+ /*
+ * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can
+ * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is
+ * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor
+ * based on the Selectivity of the PK/Dim table side.
+ *
+ * 1. If both T1.x and T2.y are keys then use the larger one as the PK side.
+ * 2. In case of outer Joins: a) The FK side should be the Null Preserving
+ * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact
+ * or Fact roj Dim b) The selectivity factor applied on the Fact Table should
+ * be 1.
+ */
+ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
+
+ RelNode left = joinRel.getInputs().get(0);
+ RelNode right = joinRel.getInputs().get(1);
+
+ final List initJoinFilters = RelOptUtil.conjunctions(joinRel
+ .getCondition());
+
+ /*
+ * No joining condition.
+ */
+ if (initJoinFilters.isEmpty()) {
+ return null;
+ }
+
+ List leftFilters = new ArrayList();
+ List rightFilters = new ArrayList();
+ List joinFilters = new ArrayList(initJoinFilters);
+
+ // @todo: remove this. 8/28/14 hb
+ // for now adding because RelOptUtil.classifyFilters has an assertion about
+ // column counts that is not true for semiJoins.
+ if (joinRel instanceof SemiJoin) {
+ return null;
+ }
+
+ RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(),
+ false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel
+ .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters,
+ rightFilters);
+
+ Pair joinCols = canHandleJoin(joinRel, leftFilters,
+ rightFilters, joinFilters);
+ if (joinCols == null) {
+ return null;
+ }
+ int leftColIdx = joinCols.left;
+ int rightColIdx = joinCols.right;
+
+ RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+ RexNode leftPred = RexUtil
+ .composeConjunction(rexBuilder, leftFilters, true);
+ RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters,
+ true);
+ ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx);
+ ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx);
+
+ /*
+ * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return
+ * null.
+ */
+ boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+ .getJoinType() == JoinRelType.RIGHT)
+ && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left);
+ boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+ .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right);
+
+ if (!leftIsKey && !rightIsKey) {
+ return null;
+ }
+
+ double leftRowCount = RelMetadataQuery.getRowCount(left);
+ double rightRowCount = RelMetadataQuery.getRowCount(right);
+
+ if (leftIsKey && rightIsKey) {
+ if (rightRowCount < leftRowCount) {
+ leftIsKey = false;
+ }
+ }
+
+ int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1;
+
+ boolean isPKSideSimpleTree = pkSide != -1 ?
+ IsSimpleTreeOnJoinKey.check(
+ pkSide == 0 ? left : right,
+ pkSide == 0 ? leftColIdx : rightColIdx) : false;
+
+ double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1;
+ double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1;
+
+ /*
+ * If the ndv of the PK - FK side don't match, and the PK side is a filter
+ * on the Key column then scale the NDV on the FK side.
+ *
+ * As described by Peter Boncz: http://databasearchitects.blogspot.com/
+ * in such cases we can be off by a large margin in the Join cardinality
+ * estimate. The e.g. he provides is on the join of StoreSales and DateDim
+ * on the TPCDS dataset. Since the DateDim is populated for 20 years into
+ * the future, while the StoreSales only has 5 years worth of data, there
+ * are 40 times fewer distinct dates in StoreSales.
+ *
+ * In general it is hard to infer the range for the foreign key on an
+ * arbitrary expression. For e.g. the NDV for DayofWeek is the same
+ * irrespective of NDV on the number of unique days, whereas the
+ * NDV of Quarters has the same ratio as the NDV on the keys.
+ *
+ * But for expressions that apply only on columns that have the same NDV
+ * as the key (implying that they are alternate keys) we can apply the
+ * ratio. So in the case of StoreSales - DateDim joins for predicate on the
+ * d_date column we can apply the scaling factor.
+ */
+ double ndvScalingFactor = 1.0;
+ if ( isPKSideSimpleTree ) {
+ ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV;
+ }
+
+ if (pkSide == 0) {
+ FKSideInfo fkInfo = new FKSideInfo(rightRowCount,
+ rightNDV);
+ double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount);
+ PKSideInfo pkInfo = new PKSideInfo(leftRowCount,
+ leftNDV,
+ joinRel.getJoinType().generatesNullsOnRight() ? 1.0 :
+ pkSelectivity);
+
+ return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+ }
+
+ if (pkSide == 1) {
+ FKSideInfo fkInfo = new FKSideInfo(leftRowCount,
+ leftNDV);
+ double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount);
+ PKSideInfo pkInfo = new PKSideInfo(rightRowCount,
+ rightNDV,
+ joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 :
+ pkSelectivity);
+
+ return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+ }
+
+ return null;
+ }
+
+ private static double pkSelectivity(Join joinRel, boolean leftChild,
+ RelNode child,
+ double childRowCount) {
+ if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) ||
+ (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) {
+ return 1.0;
+ } else {
+ HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
+ if (tScan != null) {
+ double tRowCount = RelMetadataQuery.getRowCount(tScan);
+ return childRowCount / tRowCount;
+ } else {
+ return 1.0;
+ }
+ }
+ }
+
+ private static boolean isKey(ImmutableBitSet c, RelNode rel) {
+ boolean isKey = false;
+ Set keys = RelMetadataQuery.getUniqueKeys(rel);
+ if (keys != null) {
+ for (ImmutableBitSet key : keys) {
+ if (key.equals(c)) {
+ isKey = true;
+ break;
+ }
+ }
+ }
+ return isKey;
+ }
+
+ /*
+ * 1. Join condition must be an Equality Predicate.
+ * 2. both sides must reference 1 column.
+ * 3. If needed flip the columns.
+ */
+ private static Pair canHandleJoin(Join joinRel,
+ List leftFilters, List rightFilters,
+ List joinFilters) {
+
+ /*
+ * If after classifying filters there is more than 1 joining predicate, we
+ * don't handle this. Return null.
+ */
+ if (joinFilters.size() != 1) {
+ return null;
+ }
+
+ RexNode joinCond = joinFilters.get(0);
+
+ int leftColIdx;
+ int rightColIdx;
+
+ if (!(joinCond instanceof RexCall)) {
+ return null;
+ }
+
+ if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) {
+ return null;
+ }
+
+ ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0));
+ ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1));
+
+ if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) {
+ return null;
+ }
+
+ int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+ int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+ int nSysFields = joinRel.getSystemFieldList().size();
+ ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft,
+ nSysFields + nFieldsLeft + nFieldsRight);
+ /*
+ * flip column references if join condition specified in reverse order to
+ * join sources.
+ */
+ if (rightFieldsBitSet.contains(leftCols)) {
+ ImmutableBitSet t = leftCols;
+ leftCols = rightCols;
+ rightCols = t;
+ }
+
+ leftColIdx = leftCols.nextSetBit(0) - nSysFields;
+ rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft);
+
+ return new Pair(leftColIdx, rightColIdx);
+ }
+
+ private static class IsSimpleTreeOnJoinKey extends RelVisitor {
+
+ int joinKey;
+ boolean simpleTree;
+
+ static boolean check(RelNode r, int joinKey) {
+ IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey);
+ v.go(r);
+ return v.simpleTree;
+ }
+
+ IsSimpleTreeOnJoinKey(int joinKey) {
+ super();
+ this.joinKey = joinKey;
+ simpleTree = true;
+ }
+
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+
+ if (node instanceof HepRelVertex) {
+ node = ((HepRelVertex) node).getCurrentRel();
+ }
+
+ if (node instanceof TableScan) {
+ simpleTree = true;
+ } else if (node instanceof Project) {
+ simpleTree = isSimple((Project) node);
+ } else if (node instanceof Filter) {
+ simpleTree = isSimple((Filter) node);
+ } else {
+ simpleTree = false;
+ }
+
+ if (simpleTree) {
+ super.visit(node, ordinal, parent);
+ }
+ }
+
+ private boolean isSimple(Project project) {
+ RexNode r = project.getProjects().get(joinKey);
+ if (r instanceof RexInputRef) {
+ joinKey = ((RexInputRef) r).getIndex();
+ return true;
+ }
+ return false;
+ }
+
+ private boolean isSimple(Filter filter) {
+ ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition());
+ return isKey(condBits, filter);
+ }
+
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
new file mode 100644
index 0000000..960ec40
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdSelectivity;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveRelMdSelectivity extends RelMdSelectivity {
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.SELECTIVITY.method,
+ new HiveRelMdSelectivity());
+
+ protected HiveRelMdSelectivity() {
+ super();
+ }
+
+ public Double getSelectivity(HiveTableScan t, RexNode predicate) {
+ if (predicate != null) {
+ FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t);
+ return filterSelEstmator.estimateSelectivity(predicate);
+ }
+
+ return 1.0;
+ }
+
+ public Double getSelectivity(HiveJoin j, RexNode predicate) {
+ if (j.getJoinType().equals(JoinRelType.INNER)) {
+ return computeInnerJoinSelectivity(j, predicate);
+ }
+ return 1.0;
+ }
+
+ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) {
+ double ndvCrossProduct = 1;
+ Pair predInfo =
+ getCombinedPredicateForJoin(j, predicate);
+ if (!predInfo.getKey()) {
+ return
+ new FilterSelectivityEstimator(j).
+ estimateSelectivity(predInfo.getValue());
+ }
+
+ RexNode combinedPredicate = predInfo.getValue();
+ JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j,
+ combinedPredicate);
+ ImmutableMap.Builder colStatMapBuilder = ImmutableMap
+ .builder();
+ ImmutableMap colStatMap;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ // 1. Update Col Stats Map with col stats for columns from left side of
+ // Join which are part of join keys
+ for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(ljk,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk));
+ }
+
+ // 2. Update Col Stats Map with col stats for columns from right side of
+ // Join which are part of join keys
+ for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(rjk + rightOffSet,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk));
+ }
+ colStatMap = colStatMapBuilder.build();
+
+ // 3. Walk through the Join Condition Building NDV for selectivity
+ // NDV of the join can not exceed the cardinality of cross join.
+ List peLst = jpi.getEquiJoinPredicateElements();
+ int noOfPE = peLst.size();
+ if (noOfPE > 0) {
+ ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
+
+ if (j.isLeftSemiJoin())
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+ ndvCrossProduct);
+ else
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
+ * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+ }
+
+ // 4. Join Selectivity = 1/NDV
+ return (1 / ndvCrossProduct);
+ }
+
+ // 3.2 if conjunctive predicate elements are more than one, then walk
+ // through them one by one. Compute cross product of NDV. Cross product is
+ // computed by multiplying the largest NDV of all of the conjunctive
+ // predicate
+ // elements with degraded NDV of rest of the conjunctive predicate
+ // elements. NDV is
+ // degraded using log function.Finally the ndvCrossProduct is fenced at
+ // the join
+ // cross product to ensure that NDV can not exceed worst case join
+ // cardinality.
+ // NDV of a conjunctive predicate element is the max NDV of all arguments
+ // to lhs, rhs expressions.
+ // NDV(JoinCondition) = min (left cardinality * right cardinality,
+ // ndvCrossProduct(JoinCondition))
+ // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
+ // where pex is the predicate element of join condition with max ndv.
+ // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
+ // NDV(expr) = max(NDV( expr args))
+ protected double logSmoothing(List peLst, ImmutableMap colStatMap) {
+ int noOfPE = peLst.size();
+ double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+ if (noOfPE > 1) {
+ double maxNDVSoFar = ndvCrossProduct;
+ double ndvToBeSmoothed;
+ double tmpNDV;
+
+ for (int i = 1; i < noOfPE; i++) {
+ tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
+ if (tmpNDV > maxNDVSoFar) {
+ ndvToBeSmoothed = maxNDVSoFar;
+ maxNDVSoFar = tmpNDV;
+ ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
+ } else {
+ ndvToBeSmoothed = tmpNDV;
+ }
+ // TODO: revisit the fence
+ if (ndvToBeSmoothed > 3)
+ ndvCrossProduct *= Math.log(ndvToBeSmoothed);
+ else
+ ndvCrossProduct *= ndvToBeSmoothed;
+ }
+ }
+ return ndvCrossProduct;
+ }
+
+ /*
+ * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct =
+ * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ...
+ */
+ protected double exponentialBackoff(List peLst,
+ ImmutableMap colStatMap) {
+ int noOfPE = peLst.size();
+ List ndvs = new ArrayList(noOfPE);
+ for (int i = 0; i < noOfPE; i++) {
+ ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap));
+ }
+ Collections.sort(ndvs);
+ Collections.reverse(ndvs);
+ double ndvCrossProduct = 1.0;
+ for (int i = 0; i < ndvs.size(); i++) {
+ double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i));
+ ndvCrossProduct *= n;
+ }
+ return ndvCrossProduct;
+ }
+
+ /**
+ *
+ * @param j
+ * @param additionalPredicate
+ * @return if predicate is the join condition return (true, joinCond)
+ * else return (false, minusPred)
+ */
+ private Pair getCombinedPredicateForJoin(HiveJoin j, RexNode additionalPredicate) {
+ RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate,
+ j.getCondition());
+
+ if (minusPred != null) {
+ List minusList = new ArrayList();
+ minusList.add(j.getCondition());
+ minusList.add(minusPred);
+
+ return new Pair(false, minusPred);
+ }
+
+ return new Pair(true,j.getCondition());
+ }
+
+ /**
+ * Compute Max NDV to determine Join Selectivity.
+ *
+ * @param jlpi
+ * @param colStatMap
+ * Immutable Map of Projection Index (in Join Schema) to Column Stat
+ * @param rightProjOffSet
+ * @return
+ */
+ private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi,
+ ImmutableMap colStatMap) {
+ Double maxNDVSoFar = 1.0;
+
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+
+ return maxNDVSoFar;
+ }
+
+ private static Double getMaxNDVFromProjections(Map colStatMap,
+ Set projectionSet, Double defaultMaxNDV) {
+ Double colNDV = null;
+ Double maxNDVSoFar = defaultMaxNDV;
+
+ for (Integer projIndx : projectionSet) {
+ colNDV = colStatMap.get(projIndx);
+ if (colNDV > maxNDVSoFar)
+ maxNDVSoFar = colNDV;
+ }
+
+ return maxNDVSoFar;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
new file mode 100644
index 0000000..95515b2
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.BuiltInMetadata;
+import org.apache.calcite.rel.metadata.Metadata;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdUniqueKeys;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.base.Function;
+
+public class HiveRelMdUniqueKeys {
+
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+ .reflectiveSource(BuiltInMethod.UNIQUE_KEYS.method,
+ new HiveRelMdUniqueKeys());
+
+ /*
+ * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+ * - min(col) = rowCount(col)
+ *
+ * Why are we intercepting Project and not TableScan? Because if we
+ * have a method for TableScan, it will not know which columns to check for.
+ * Inferring Uniqueness for all columns is very expensive right now. The flip
+ * side of doing this is, it only works post Field Trimming.
+ */
+ public Set getUniqueKeys(Project rel, boolean ignoreNulls) {
+
+ HiveTableScan tScan = getTableScan(rel.getInput(), false);
+
+ if ( tScan == null ) {
+ Function fn = RelMdUniqueKeys.SOURCE.apply(
+ rel.getClass(), BuiltInMetadata.UniqueKeys.class);
+ return ((BuiltInMetadata.UniqueKeys) fn.apply(rel))
+ .getUniqueKeys(ignoreNulls);
+ }
+
+ Map posMap = new HashMap();
+ int projectPos = 0;
+ int colStatsPos = 0;
+
+ BitSet projectedCols = new BitSet();
+ for (RexNode r : rel.getProjects()) {
+ if (r instanceof RexInputRef) {
+ projectedCols.set(((RexInputRef) r).getIndex());
+ posMap.put(colStatsPos, projectPos);
+ colStatsPos++;
+ }
+ projectPos++;
+ }
+
+ double numRows = tScan.getRows();
+ List colStats = tScan.getColStat(BitSets
+ .toList(projectedCols));
+ Set keys = new HashSet();
+
+ colStatsPos = 0;
+ for (ColStatistics cStat : colStats) {
+ boolean isKey = false;
+ if (cStat.getCountDistint() >= numRows) {
+ isKey = true;
+ }
+ if ( !isKey && cStat.getRange() != null &&
+ cStat.getRange().maxValue != null &&
+ cStat.getRange().minValue != null) {
+ double r = cStat.getRange().maxValue.doubleValue() -
+ cStat.getRange().minValue.doubleValue() + 1;
+ isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
+ }
+ if ( isKey ) {
+ ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
+ keys.add(key);
+ }
+ colStatsPos++;
+ }
+
+ return keys;
+ }
+
+ /*
+ * traverse a path of Filter, Projects to get to the TableScan.
+ * In case of Unique keys, stop if you reach a Project, it will be handled
+ * by the invocation on the Project.
+ * In case of getting the base rowCount of a Path, keep going past a Project.
+ */
+ static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
+
+ while (r != null && !(r instanceof HiveTableScan)) {
+ if (r instanceof HepRelVertex) {
+ r = ((HepRelVertex) r).getCurrentRel();
+ } else if (r instanceof Filter) {
+ r = ((Filter) r).getInput();
+ } else if (traverseProject && r instanceof Project) {
+ r = ((Project) r).getInput();
+ } else {
+ r = null;
+ }
+ }
+ return r == null ? null : (HiveTableScan) r;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
new file mode 100644
index 0000000..e6e6fe3
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -0,0 +1,254 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+
+import org.apache.calcite.avatica.ByteString;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+
+class ASTBuilder {
+
+ static ASTBuilder construct(int tokenType, String text) {
+ ASTBuilder b = new ASTBuilder();
+ b.curr = createAST(tokenType, text);
+ return b;
+ }
+
+ static ASTNode createAST(int tokenType, String text) {
+ return (ASTNode) ParseDriver.adaptor.create(tokenType, text);
+ }
+
+ static ASTNode destNode() {
+ return ASTBuilder
+ .construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_DIR, "TOK_DIR").add(HiveParser.TOK_TMP_FILE,
+ "TOK_TMP_FILE")).node();
+ }
+
+ static ASTNode table(TableScan scan) {
+ RelOptHiveTable hTbl = (RelOptHiveTable) scan.getTable();
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABREF, "TOK_TABREF").add(
+ ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME")
+ .add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName())
+ .add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName()));
+
+ // NOTE: Calcite considers tbls to be equal if their names are the same. Hence
+ // we need to provide Calcite the fully qualified table name (dbname.tblname)
+ // and not the user provided aliases.
+ // However in HIVE DB name can not appear in select list; in case of join
+ // where table names differ only in DB name, Hive would require user
+ // introducing explicit aliases for tbl.
+ b.add(HiveParser.Identifier, hTbl.getTableAlias());
+ return b.node();
+ }
+
+ static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
+ boolean semiJoin) {
+ ASTBuilder b = null;
+
+ switch (joinType) {
+ case INNER:
+ if (semiJoin) {
+ b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
+ } else {
+ b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN");
+ }
+ break;
+ case LEFT:
+ b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN");
+ break;
+ case RIGHT:
+ b = ASTBuilder.construct(HiveParser.TOK_RIGHTOUTERJOIN, "TOK_RIGHTOUTERJOIN");
+ break;
+ case FULL:
+ b = ASTBuilder.construct(HiveParser.TOK_FULLOUTERJOIN, "TOK_FULLOUTERJOIN");
+ break;
+ }
+
+ b.add(left).add(right).add(cond);
+ return b.node();
+ }
+
+ static ASTNode subQuery(ASTNode qry, String alias) {
+ return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry)
+ .add(HiveParser.Identifier, alias).node();
+ }
+
+ static ASTNode qualifiedName(String tableName, String colName) {
+ ASTBuilder b = ASTBuilder
+ .construct(HiveParser.DOT, ".")
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
+ HiveParser.Identifier, tableName)).add(HiveParser.Identifier, colName);
+ return b.node();
+ }
+
+ static ASTNode unqualifiedName(String colName) {
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
+ HiveParser.Identifier, colName);
+ return b.node();
+ }
+
+ static ASTNode where(ASTNode cond) {
+ return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node();
+ }
+
+ static ASTNode having(ASTNode cond) {
+ return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
+ }
+
+ static ASTNode limit(Object value) {
+ return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
+ .add(HiveParser.Number, value.toString()).node();
+ }
+
+ static ASTNode selectExpr(ASTNode expr, String alias) {
+ return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr)
+ .add(HiveParser.Identifier, alias).node();
+ }
+
+ static ASTNode literal(RexLiteral literal) {
+ return literal(literal, false);
+ }
+
+ static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) {
+ Object val = null;
+ int type = 0;
+ SqlTypeName sqlType = literal.getType().getSqlTypeName();
+
+ switch (sqlType) {
+ case BINARY:
+ ByteString bs = (ByteString) literal.getValue();
+ val = bs.byteAt(0);
+ type = HiveParser.BigintLiteral;
+ break;
+ case TINYINT:
+ if (useTypeQualInLiteral) {
+ val = literal.getValue3() + "Y";
+ } else {
+ val = literal.getValue3();
+ }
+ type = HiveParser.TinyintLiteral;
+ break;
+ case SMALLINT:
+ if (useTypeQualInLiteral) {
+ val = literal.getValue3() + "S";
+ } else {
+ val = literal.getValue3();
+ }
+ type = HiveParser.SmallintLiteral;
+ break;
+ case INTEGER:
+ val = literal.getValue3();
+ type = HiveParser.BigintLiteral;
+ break;
+ case BIGINT:
+ if (useTypeQualInLiteral) {
+ val = literal.getValue3() + "L";
+ } else {
+ val = literal.getValue3();
+ }
+ type = HiveParser.BigintLiteral;
+ break;
+ case DOUBLE:
+ val = literal.getValue3() + "D";
+ type = HiveParser.Number;
+ break;
+ case DECIMAL:
+ val = literal.getValue3() + "BD";
+ type = HiveParser.DecimalLiteral;
+ break;
+ case FLOAT:
+ case REAL:
+ val = literal.getValue3();
+ type = HiveParser.Number;
+ break;
+ case VARCHAR:
+ case CHAR:
+ val = literal.getValue3();
+ String escapedVal = BaseSemanticAnalyzer.escapeSQLString(String.valueOf(val));
+ type = HiveParser.StringLiteral;
+ val = "'" + escapedVal + "'";
+ break;
+ case BOOLEAN:
+ val = literal.getValue3();
+ type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE;
+ break;
+ case DATE: {
+ val = literal.getValue();
+ type = HiveParser.TOK_DATELITERAL;
+ DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
+ val = df.format(((Calendar) val).getTime());
+ val = "'" + val + "'";
+ }
+ break;
+ case TIME:
+ case TIMESTAMP: {
+ val = literal.getValue();
+ type = HiveParser.TOK_TIMESTAMPLITERAL;
+ DateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
+ val = df.format(((Calendar) val).getTime());
+ val = "'" + val + "'";
+ }
+ break;
+ case NULL:
+ type = HiveParser.TOK_NULL;
+ break;
+
+ default:
+ throw new RuntimeException("Unsupported Type: " + sqlType);
+ }
+
+ return (ASTNode) ParseDriver.adaptor.create(type, String.valueOf(val));
+ }
+
+ ASTNode curr;
+
+ ASTNode node() {
+ return curr;
+ }
+
+ ASTBuilder add(int tokenType, String text) {
+ ParseDriver.adaptor.addChild(curr, createAST(tokenType, text));
+ return this;
+ }
+
+ ASTBuilder add(ASTBuilder b) {
+ ParseDriver.adaptor.addChild(curr, b.curr);
+ return this;
+ }
+
+ ASTBuilder add(ASTNode n) {
+ if (n != null) {
+ ParseDriver.adaptor.addChild(curr, n);
+ }
+ return this;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
new file mode 100644
index 0000000..c02a65e
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -0,0 +1,668 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexFieldCollation;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.rex.RexWindow;
+import org.apache.calcite.rex.RexWindowBound;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.BitSets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+
+import com.google.common.collect.Iterables;
+
+public class ASTConverter {
+ private static final Log LOG = LogFactory.getLog(ASTConverter.class);
+
+ private RelNode root;
+ private HiveAST hiveAST;
+ private RelNode from;
+ private Filter where;
+ private Aggregate groupBy;
+ private Filter having;
+ private Project select;
+ private Sort order;
+ private Sort limit;
+
+ private Schema schema;
+
+ private long derivedTableCount;
+
+ ASTConverter(RelNode root, long dtCounterInitVal) {
+ this.root = root;
+ hiveAST = new HiveAST();
+ this.derivedTableCount = dtCounterInitVal;
+ }
+
+ public static ASTNode convert(final RelNode relNode, List resultSchema)
+ throws CalciteSemanticException {
+ RelNode root = PlanModifierForASTConv.convertOpTree(relNode, resultSchema);
+ ASTConverter c = new ASTConverter(root, 0);
+ return c.convert();
+ }
+
+ private ASTNode convert() {
+ /*
+ * 1. Walk RelNode Graph; note from, where, gBy.. nodes.
+ */
+ new QBVisitor().go(root);
+
+ /*
+ * 2. convert from node.
+ */
+ QueryBlockInfo qb = convertSource(from);
+ schema = qb.schema;
+ hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node();
+
+ /*
+ * 3. convert filterNode
+ */
+ if (where != null) {
+ ASTNode cond = where.getCondition().accept(new RexVisitor(schema));
+ hiveAST.where = ASTBuilder.where(cond);
+ }
+
+ /*
+ * 4. GBy
+ */
+ if (groupBy != null) {
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY");
+ for (int i : BitSets.toIter(groupBy.getGroupSet())) {
+ RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory()
+ .createSqlType(SqlTypeName.ANY));
+ b.add(iRef.accept(new RexVisitor(schema)));
+ }
+
+ if (!groupBy.getGroupSet().isEmpty())
+ hiveAST.groupBy = b.node();
+ schema = new Schema(schema, groupBy);
+ }
+
+ /*
+ * 5. Having
+ */
+ if (having != null) {
+ ASTNode cond = having.getCondition().accept(new RexVisitor(schema));
+ hiveAST.having = ASTBuilder.having(cond);
+ }
+
+ /*
+ * 6. Project
+ */
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT");
+
+ if (select.getChildExps().isEmpty()) {
+ RexLiteral r = select.getCluster().getRexBuilder().makeExactLiteral(new BigDecimal(1));
+ ASTNode selectExpr = ASTBuilder.selectExpr(ASTBuilder.literal(r), "1");
+ b.add(selectExpr);
+ } else {
+ int i = 0;
+
+ for (RexNode r : select.getChildExps()) {
+ ASTNode selectExpr = ASTBuilder.selectExpr(r.accept(
+ new RexVisitor(schema, r instanceof RexLiteral)),
+ select.getRowType().getFieldNames().get(i++));
+ b.add(selectExpr);
+ }
+ }
+ hiveAST.select = b.node();
+
+ /*
+ * 7. Order Use in Order By from the block above. RelNode has no pointer to
+ * parent hence we need to go top down; but OB at each block really belong
+ * to its src/from. Hence the need to pass in sort for each block from
+ * its parent.
+ */
+ convertOBToASTNode((HiveSort) order);
+
+ // 8. Limit
+ convertLimitToASTNode((HiveSort) limit);
+
+ return hiveAST.getAST();
+ }
+
+ private void convertLimitToASTNode(HiveSort limit) {
+ if (limit != null) {
+ HiveSort hiveLimit = (HiveSort) limit;
+ RexNode limitExpr = hiveLimit.getFetchExpr();
+ if (limitExpr != null) {
+ Object val = ((RexLiteral) limitExpr).getValue2();
+ hiveAST.limit = ASTBuilder.limit(val);
+ }
+ }
+ }
+
+ private void convertOBToASTNode(HiveSort order) {
+ if (order != null) {
+ HiveSort hiveSort = (HiveSort) order;
+ if (!hiveSort.getCollation().getFieldCollations().isEmpty()) {
+ // 1 Add order by token
+ ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
+
+ schema = new Schema((HiveSort) hiveSort);
+ Map obRefToCallMap = hiveSort.getInputRefToCallMap();
+ RexNode obExpr;
+ ASTNode astCol;
+ for (RelFieldCollation c : hiveSort.getCollation().getFieldCollations()) {
+
+ // 2 Add Direction token
+ ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC");
+
+ // 3 Convert OB expr (OB Expr is usually an input ref except for top
+ // level OB; top level OB will have RexCall kept in a map.)
+ obExpr = null;
+ if (obRefToCallMap != null)
+ obExpr = obRefToCallMap.get(c.getFieldIndex());
+
+ if (obExpr != null) {
+ astCol = obExpr.accept(new RexVisitor(schema));
+ } else {
+ ColumnInfo cI = schema.get(c.getFieldIndex());
+ /*
+ * The RowResolver setup for Select drops Table associations. So
+ * setup ASTNode on unqualified name.
+ */
+ astCol = ASTBuilder.unqualifiedName(cI.column);
+ }
+
+ // 4 buildup the ob expr AST
+ directionAST.addChild(astCol);
+ orderAst.addChild(directionAST);
+ }
+ hiveAST.order = orderAst;
+ }
+ }
+ }
+
+ private Schema getRowSchema(String tblAlias) {
+ return new Schema(select, tblAlias);
+ }
+
+ private QueryBlockInfo convertSource(RelNode r) {
+ Schema s;
+ ASTNode ast;
+
+ if (r instanceof TableScan) {
+ TableScan f = (TableScan) r;
+ s = new Schema(f);
+ ast = ASTBuilder.table(f);
+ } else if (r instanceof Join) {
+ Join join = (Join) r;
+ QueryBlockInfo left = convertSource(join.getLeft());
+ QueryBlockInfo right = convertSource(join.getRight());
+ s = new Schema(left.schema, right.schema);
+ ASTNode cond = join.getCondition().accept(new RexVisitor(s));
+ boolean semiJoin = join instanceof SemiJoin;
+ ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin);
+ if (semiJoin)
+ s = left.schema;
+ } else if (r instanceof Union) {
+ RelNode leftInput = ((Union) r).getInput(0);
+ RelNode rightInput = ((Union) r).getInput(1);
+
+ ASTConverter leftConv = new ASTConverter(leftInput, this.derivedTableCount);
+ ASTConverter rightConv = new ASTConverter(rightInput, this.derivedTableCount);
+ ASTNode leftAST = leftConv.convert();
+ ASTNode rightAST = rightConv.convert();
+
+ ASTNode unionAST = getUnionAllAST(leftAST, rightAST);
+
+ String sqAlias = nextAlias();
+ ast = ASTBuilder.subQuery(unionAST, sqAlias);
+ s = new Schema((Union) r, sqAlias);
+ } else {
+ ASTConverter src = new ASTConverter(r, this.derivedTableCount);
+ ASTNode srcAST = src.convert();
+ String sqAlias = nextAlias();
+ s = src.getRowSchema(sqAlias);
+ ast = ASTBuilder.subQuery(srcAST, sqAlias);
+ }
+ return new QueryBlockInfo(s, ast);
+ }
+
+ class QBVisitor extends RelVisitor {
+
+ public void handle(Filter filter) {
+ RelNode child = filter.getInput();
+ if (child instanceof Aggregate && !((Aggregate) child).getGroupSet().isEmpty()) {
+ ASTConverter.this.having = filter;
+ } else {
+ ASTConverter.this.where = filter;
+ }
+ }
+
+ public void handle(Project project) {
+ if (ASTConverter.this.select == null) {
+ ASTConverter.this.select = project;
+ } else {
+ ASTConverter.this.from = project;
+ }
+ }
+
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+
+ if (node instanceof TableScan) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof Filter) {
+ handle((Filter) node);
+ } else if (node instanceof Project) {
+ handle((Project) node);
+ } else if (node instanceof Join) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof Union) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof Aggregate) {
+ ASTConverter.this.groupBy = (Aggregate) node;
+ } else if (node instanceof Sort) {
+ if (ASTConverter.this.select != null) {
+ ASTConverter.this.from = node;
+ } else {
+ Sort hiveSortRel = (Sort) node;
+ if (hiveSortRel.getCollation().getFieldCollations().isEmpty())
+ ASTConverter.this.limit = hiveSortRel;
+ else
+ ASTConverter.this.order = hiveSortRel;
+ }
+ }
+ /*
+ * once the source node is reached; stop traversal for this QB
+ */
+ if (ASTConverter.this.from == null) {
+ node.childrenAccept(this);
+ }
+ }
+
+ }
+
+ static class RexVisitor extends RexVisitorImpl {
+
+ private final Schema schema;
+ private boolean useTypeQualInLiteral;
+
+ protected RexVisitor(Schema schema) {
+ this(schema, false);
+ }
+
+ protected RexVisitor(Schema schema, boolean useTypeQualInLiteral) {
+ super(true);
+ this.schema = schema;
+ this.useTypeQualInLiteral = useTypeQualInLiteral;
+ }
+
+ @Override
+ public ASTNode visitFieldAccess(RexFieldAccess fieldAccess) {
+ return ASTBuilder.construct(HiveParser.DOT, ".").add(super.visitFieldAccess(fieldAccess))
+ .add(HiveParser.Identifier, fieldAccess.getField().getName()).node();
+ }
+
+ @Override
+ public ASTNode visitInputRef(RexInputRef inputRef) {
+ ColumnInfo cI = schema.get(inputRef.getIndex());
+ if (cI.agg != null) {
+ return (ASTNode) ParseDriver.adaptor.dupTree(cI.agg);
+ }
+
+ if (cI.table == null || cI.table.isEmpty())
+ return ASTBuilder.unqualifiedName(cI.column);
+ else
+ return ASTBuilder.qualifiedName(cI.table, cI.column);
+
+ }
+
+ @Override
+ public ASTNode visitLiteral(RexLiteral literal) {
+ return ASTBuilder.literal(literal, useTypeQualInLiteral);
+ }
+
+ private ASTNode getPSpecAST(RexWindow window) {
+ ASTNode pSpecAst = null;
+
+ ASTNode dByAst = null;
+ if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) {
+ dByAst = ASTBuilder.createAST(HiveParser.TOK_DISTRIBUTEBY, "TOK_DISTRIBUTEBY");
+ for (RexNode pk : window.partitionKeys) {
+ ASTNode astCol = pk.accept(this);
+ dByAst.addChild(astCol);
+ }
+ }
+
+ ASTNode oByAst = null;
+ if (window.orderKeys != null && !window.orderKeys.isEmpty()) {
+ oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
+ for (RexFieldCollation ok : window.orderKeys) {
+ ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC");
+ ASTNode astCol = ok.left.accept(this);
+ astNode.addChild(astCol);
+ oByAst.addChild(astNode);
+ }
+ }
+
+ if (dByAst != null || oByAst != null) {
+ pSpecAst = ASTBuilder.createAST(HiveParser.TOK_PARTITIONINGSPEC, "TOK_PARTITIONINGSPEC");
+ if (dByAst != null)
+ pSpecAst.addChild(dByAst);
+ if (oByAst != null)
+ pSpecAst.addChild(oByAst);
+ }
+
+ return pSpecAst;
+ }
+
+ private ASTNode getWindowBound(RexWindowBound wb) {
+ ASTNode wbAST = null;
+
+ if (wb.isCurrentRow()) {
+ wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT");
+ } else {
+ if (wb.isPreceding())
+ wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING");
+ else
+ wbAST = ASTBuilder.createAST(HiveParser.KW_FOLLOWING, "FOLLOWING");
+ if (wb.isUnbounded()) {
+ wbAST.addChild(ASTBuilder.createAST(HiveParser.KW_UNBOUNDED, "UNBOUNDED"));
+ } else {
+ ASTNode offset = wb.getOffset().accept(this);
+ wbAST.addChild(offset);
+ }
+ }
+
+ return wbAST;
+ }
+
+ private ASTNode getWindowRangeAST(RexWindow window) {
+ ASTNode wRangeAst = null;
+
+ ASTNode startAST = null;
+ RexWindowBound ub = window.getUpperBound();
+ if (ub != null) {
+ startAST = getWindowBound(ub);
+ }
+
+ ASTNode endAST = null;
+ RexWindowBound lb = window.getLowerBound();
+ if (lb != null) {
+ endAST = getWindowBound(lb);
+ }
+
+ if (startAST != null || endAST != null) {
+ // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical)
+ if (window.isRows())
+ wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE");
+ else
+ wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES");
+ if (startAST != null)
+ wRangeAst.addChild(startAST);
+ if (endAST != null)
+ wRangeAst.addChild(endAST);
+ }
+
+ return wRangeAst;
+ }
+
+ @Override
+ public ASTNode visitOver(RexOver over) {
+ if (!deep) {
+ return null;
+ }
+
+ // 1. Translate the UDAF
+ final ASTNode wUDAFAst = visitCall(over);
+
+ // 2. Add TOK_WINDOW as child of UDAF
+ ASTNode wSpec = ASTBuilder.createAST(HiveParser.TOK_WINDOWSPEC, "TOK_WINDOWSPEC");
+ wUDAFAst.addChild(wSpec);
+
+ // 3. Add Part Spec & Range Spec as child of TOK_WINDOW
+ final RexWindow window = over.getWindow();
+ final ASTNode wPSpecAst = getPSpecAST(window);
+ final ASTNode wRangeAst = getWindowRangeAST(window);
+ if (wPSpecAst != null)
+ wSpec.addChild(wPSpecAst);
+ if (wRangeAst != null)
+ wSpec.addChild(wRangeAst);
+
+ return wUDAFAst;
+ }
+
+ @Override
+ public ASTNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ SqlOperator op = call.getOperator();
+ List astNodeLst = new LinkedList();
+ if (op.kind == SqlKind.CAST) {
+ HiveToken ht = TypeConverter.hiveToken(call.getType());
+ ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text);
+ if (ht.args != null) {
+ for (String castArg : ht.args)
+ astBldr.add(HiveParser.Identifier, castArg);
+ }
+ astNodeLst.add(astBldr.node());
+ }
+
+ for (RexNode operand : call.operands) {
+ astNodeLst.add(operand.accept(this));
+ }
+
+ if (isFlat(call))
+ return SqlFunctionConverter.buildAST(op, astNodeLst, 0);
+ else
+ return SqlFunctionConverter.buildAST(op, astNodeLst);
+ }
+ }
+
+ static class QueryBlockInfo {
+ Schema schema;
+ ASTNode ast;
+
+ public QueryBlockInfo(Schema schema, ASTNode ast) {
+ super();
+ this.schema = schema;
+ this.ast = ast;
+ }
+ }
+
+ /*
+ * represents the schema exposed by a QueryBlock.
+ */
+ static class Schema extends ArrayList {
+
+ private static final long serialVersionUID = 1L;
+
+ Schema(TableScan scan) {
+ String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias();
+ for (RelDataTypeField field : scan.getRowType().getFieldList()) {
+ add(new ColumnInfo(tabName, field.getName()));
+ }
+ }
+
+ Schema(Project select, String alias) {
+ for (RelDataTypeField field : select.getRowType().getFieldList()) {
+ add(new ColumnInfo(alias, field.getName()));
+ }
+ }
+
+ Schema(Union unionRel, String alias) {
+ for (RelDataTypeField field : unionRel.getRowType().getFieldList()) {
+ add(new ColumnInfo(alias, field.getName()));
+ }
+ }
+
+ Schema(Schema left, Schema right) {
+ for (ColumnInfo cI : Iterables.concat(left, right)) {
+ add(cI);
+ }
+ }
+
+ Schema(Schema src, Aggregate gBy) {
+ for (int i : BitSets.toIter(gBy.getGroupSet())) {
+ ColumnInfo cI = src.get(i);
+ add(cI);
+ }
+ List aggs = gBy.getAggCallList();
+ for (AggregateCall agg : aggs) {
+ int argCount = agg.getArgList().size();
+ ASTBuilder b = agg.isDistinct() ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONDI,
+ "TOK_FUNCTIONDI") : argCount == 0 ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONSTAR,
+ "TOK_FUNCTIONSTAR") : ASTBuilder.construct(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+ b.add(HiveParser.Identifier, agg.getAggregation().getName());
+ for (int i : agg.getArgList()) {
+ RexInputRef iRef = new RexInputRef(i, gBy.getCluster().getTypeFactory()
+ .createSqlType(SqlTypeName.ANY));
+ b.add(iRef.accept(new RexVisitor(src)));
+ }
+ add(new ColumnInfo(null, b.node()));
+ }
+ }
+
+ /**
+ * Assumption:
+ * 1. Project will always be child of Sort.
+ * 2. In Calcite every projection in Project is uniquely named
+ * (unambigous) without using table qualifier (table name).
+ *
+ * @param order
+ * Hive Sort Node
+ * @return Schema
+ */
+ public Schema(HiveSort order) {
+ Project select = (Project) order.getInput();
+ for (String projName : select.getRowType().getFieldNames()) {
+ add(new ColumnInfo(null, projName));
+ }
+ }
+ }
+
+ /*
+ * represents Column information exposed by a QueryBlock.
+ */
+ static class ColumnInfo {
+ String table;
+ String column;
+ ASTNode agg;
+
+ ColumnInfo(String table, String column) {
+ super();
+ this.table = table;
+ this.column = column;
+ }
+
+ ColumnInfo(String table, ASTNode agg) {
+ super();
+ this.table = table;
+ this.agg = agg;
+ }
+
+ ColumnInfo(String alias, ColumnInfo srcCol) {
+ this.table = alias;
+ this.column = srcCol.column;
+ this.agg = srcCol.agg;
+ }
+ }
+
+ private String nextAlias() {
+ String tabAlias = String.format("$hdt$_%d", derivedTableCount);
+ derivedTableCount++;
+ return tabAlias;
+ }
+
+ static class HiveAST {
+
+ ASTNode from;
+ ASTNode where;
+ ASTNode groupBy;
+ ASTNode having;
+ ASTNode select;
+ ASTNode order;
+ ASTNode limit;
+
+ public ASTNode getAST() {
+ ASTBuilder b = ASTBuilder
+ .construct(HiveParser.TOK_QUERY, "TOK_QUERY")
+ .add(from)
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_INSERT, "TOK_INSERT").add(ASTBuilder.destNode())
+ .add(select).add(where).add(groupBy).add(having).add(order).add(limit));
+ return b.node();
+ }
+ }
+
+ public ASTNode getUnionAllAST(ASTNode leftAST, ASTNode rightAST) {
+
+ ASTNode unionTokAST = ASTBuilder.construct(HiveParser.TOK_UNION, "TOK_UNION").add(leftAST)
+ .add(rightAST).node();
+
+ return unionTokAST;
+ }
+
+ public static boolean isFlat(RexCall call) {
+ boolean flat = false;
+ if (call.operands != null && call.operands.size() > 2) {
+ SqlOperator op = call.getOperator();
+ if (op.getKind() == SqlKind.AND || op.getKind() == SqlKind.OR) {
+ flat = true;
+ }
+ }
+
+ return flat;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
new file mode 100644
index 0000000..cce65f1
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+
+/*
+ * convert a RexNode to an ExprNodeDesc
+ */
+public class ExprNodeConverter extends RexVisitorImpl {
+
+ RelDataType rType;
+ String tabAlias;
+ boolean partitioningExpr;
+
+ public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) {
+ super(true);
+ /*
+ * hb: 6/25/14 for now we only support expressions that only contain
+ * partition cols. there is no use case for supporting generic expressions.
+ * for supporting generic exprs., we need to give the converter information
+ * on whether a column is a partition column or not, whether a column is a
+ * virtual column or not.
+ */
+ assert partitioningExpr == true;
+ this.tabAlias = tabAlias;
+ this.rType = rType;
+ this.partitioningExpr = partitioningExpr;
+ }
+
+ @Override
+ public ExprNodeDesc visitInputRef(RexInputRef inputRef) {
+ RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+ return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias,
+ partitioningExpr);
+ }
+
+ @Override
+ public ExprNodeDesc visitCall(RexCall call) {
+ ExprNodeGenericFuncDesc gfDesc = null;
+
+ if (!deep) {
+ return null;
+ }
+
+ List args = new LinkedList();
+
+ for (RexNode operand : call.operands) {
+ args.add(operand.accept(this));
+ }
+
+ // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the
+ // exprnode
+ if (ASTConverter.isFlat(call)) {
+ ArrayList tmpExprArgs = new ArrayList();
+ tmpExprArgs.addAll(args.subList(0, 2));
+ gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()),
+ SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs);
+ for (int i = 2; i < call.operands.size(); i++) {
+ tmpExprArgs = new ArrayList