();
+ // Add the transformation that computes the lineage information.
+ transformations.add(new Generator());
+ transformations.add(new PredicateTransitivePropagate());
+ transformations.add(new PredicatePushDown());
+ transformations.add(new PartitionPruner());
+ transformations.add(new PartitionConditionRemover());
+ transformations.add(new ColumnPruner());
+ transformations.add(new AnnotateWithStatistics());
+ }
+
+ /**
+ * Invoke all the transformations one-by-one, and alter the query plan.
+ *
+ * @return ParseContext
+ * @throws SemanticException
+ */
+ public ParseContext optimize() throws SemanticException {
+ for (Transform t : transformations) {
+ pctx = t.transform(pctx);
+ }
+ return pctx;
+ }
+
+ /**
+ * @return the pctx
+ */
+ public ParseContext getPctx() {
+ return pctx;
+ }
+
+ /**
+ * @param pctx
+ * the pctx to set
+ */
+ public void setPctx(ParseContext pctx) {
+ this.pctx = pctx;
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
new file mode 100644
index 0000000..2c08772
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
@@ -0,0 +1,27 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import com.google.common.collect.ImmutableList;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdSelectivity;
+import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
+import org.eigenbase.rel.metadata.DefaultRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+
+/**
+ * Distinct row count and Selectivity is overridden for Hive.
+ *
+ * Distinct Row Count is overridden for:
+ * 1) Join 2) TableScan.
+ * Selectivity is overridden for:
+ * 1) Join 2) TableScan & Filter.
+ */
+public class HiveDefaultRelMetadataProvider {
+ private HiveDefaultRelMetadataProvider() {
+ }
+
+ public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList
+ .of(HiveRelMdDistinctRowCount.SOURCE,
+ HiveRelMdSelectivity.SOURCE,
+ new DefaultRelMetadataProvider()));
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
new file mode 100644
index 0000000..9faae39
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
@@ -0,0 +1,200 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import org.eigenbase.rel.RelFactories.ProjectFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.sql.validate.SqlValidatorUtil;
+import org.eigenbase.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+/**
+ * Generic utility functions needed for Optiq based Hive CBO.
+ */
+
+public class HiveOptiqUtil {
+
+ /**
+ * Get list of virtual columns from the given list of projections.
+ *
+ *
+ * @param exps
+ * list of rex nodes representing projections
+ * @return List of Virtual Columns, will not be null.
+ */
+ public static List getVirtualCols(List extends RexNode> exps) {
+ List vCols = new ArrayList();
+
+ for (int i = 0; i < exps.size(); i++) {
+ if (!(exps.get(i) instanceof RexInputRef)) {
+ vCols.add(i);
+ }
+ }
+
+ return vCols;
+ }
+
+ public static List getProjsFromBelowAsInputRef(final RelNode rel) {
+ List projectList = Lists.transform(rel.getRowType().getFieldList(),
+ new Function() {
+ @Override
+ public RexNode apply(RelDataTypeField field) {
+ return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex());
+ }
+ });
+ return projectList;
+ }
+
+ public static List translateBitSetToProjIndx(BitSet projBitSet) {
+ List projIndxLst = new ArrayList();
+
+ for (int i = 0; i < projBitSet.length(); i++) {
+ if (projBitSet.get(i)) {
+ projIndxLst.add(i);
+ }
+ }
+
+ return projIndxLst;
+ }
+
+ @Deprecated
+ public static void todo(String s) {
+ }
+
+ /**
+ * Push any equi join conditions that are not column references as Projections
+ * on top of the children.
+ *
+ * @param factory
+ * Project factory to use.
+ * @param inputRels
+ * inputs to a join
+ * @param leftJoinKeys
+ * expressions for LHS of join key
+ * @param rightJoinKeys
+ * expressions for RHS of join key
+ * @param systemColCount
+ * number of system columns, usually zero. These columns are
+ * projected at the leading edge of the output row.
+ * @param leftKeys on return this contains the join key positions from
+ * the new project rel on the LHS.
+ * @param rightKeys on return this contains the join key positions from
+ * the new project rel on the RHS.
+ * @return the join condition after the equi expressions pushed down.
+ */
+ public static RexNode projectNonColumnEquiConditions(ProjectFactory factory,
+ RelNode[] inputRels, List leftJoinKeys,
+ List rightJoinKeys, int systemColCount, List leftKeys,
+ List rightKeys) {
+ RelNode leftRel = inputRels[0];
+ RelNode rightRel = inputRels[1];
+ RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder();
+ RexNode outJoinCond = null;
+
+ int origLeftInputSize = leftRel.getRowType().getFieldCount();
+ int origRightInputSize = rightRel.getRowType().getFieldCount();
+
+ List newLeftFields = new ArrayList();
+ List newLeftFieldNames = new ArrayList();
+
+ List newRightFields = new ArrayList();
+ List newRightFieldNames = new ArrayList();
+ int leftKeyCount = leftJoinKeys.size();
+ int i;
+
+ for (i = 0; i < origLeftInputSize; i++) {
+ final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i);
+ newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newLeftFieldNames.add(field.getName());
+ }
+
+ for (i = 0; i < origRightInputSize; i++) {
+ final RelDataTypeField field = rightRel.getRowType().getFieldList()
+ .get(i);
+ newRightFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newRightFieldNames.add(field.getName());
+ }
+
+ int newKeyCount = 0;
+ List> origColEqConds = new ArrayList>();
+ for (i = 0; i < leftKeyCount; i++) {
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+
+ if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) {
+ origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(),
+ ((RexInputRef) rightKey).getIndex()));
+ } else {
+ newLeftFields.add(leftKey);
+ newLeftFieldNames.add(null);
+ newRightFields.add(rightKey);
+ newRightFieldNames.add(null);
+ newKeyCount++;
+ }
+ }
+
+ for (i = 0; i < origColEqConds.size(); i++) {
+ Pair p = origColEqConds.get(i);
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+ leftKeys.add(p.left);
+ rightKeys.add(p.right);
+ RexNode cond = rexBuilder.makeCall(
+ SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left),
+ rexBuilder.makeInputRef(rightKey.getType(), systemColCount
+ + origLeftInputSize + newKeyCount + p.right));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond,
+ cond);
+ }
+ }
+
+ if (newKeyCount == 0) {
+ return outJoinCond;
+ }
+
+ int newLeftOffset = systemColCount + origLeftInputSize;
+ int newRightOffset = systemColCount + origLeftInputSize
+ + origRightInputSize + newKeyCount;
+ for (i = 0; i < newKeyCount; i++) {
+ leftKeys.add(origLeftInputSize + i);
+ rightKeys.add(origRightInputSize + i);
+ RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, rexBuilder
+ .makeInputRef(newLeftFields.get(i).getType(), newLeftOffset + i),
+ rexBuilder.makeInputRef(newLeftFields.get(i).getType(),
+ newRightOffset + i));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond,
+ cond);
+ }
+ }
+
+ // added project if need to produce new keys than the original input
+ // fields
+ if (newKeyCount > 0) {
+ leftRel = factory.createProject(leftRel, newLeftFields,
+ SqlValidatorUtil.uniquify(newLeftFieldNames));
+ rightRel = factory.createProject(rightRel, newRightFields,
+ SqlValidatorUtil.uniquify(newRightFieldNames));
+ }
+
+ inputRels[0] = leftRel;
+ inputRels[1] = rightRel;
+
+ return outJoinCond;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java
new file mode 100644
index 0000000..da77d36
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java
@@ -0,0 +1,295 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.SqlKind;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * Utility for inspecting Join Conditions.
+ *
+ * Main Elements:
+ * 1. JoinPredicateInfo - represents Join Condition.
+ * 2. JoinLeafPredicateInfo - represents leaf predicates with in join condition.
+ *
+ * TODO: Move this to Optiq Framework
+ */
+public class JoinUtil {
+
+ /**
+ * JoinPredicateInfo represents Join condition; JoinPredicate Info uses
+ * JoinLeafPredicateInfo to represent individual conjunctive elements in the
+ * predicate.
+ * JoinPredicateInfo = JoinLeafPredicateInfo1 and JoinLeafPredicateInfo2...
+ *
+ * JoinPredicateInfo:
+ * 1. preserves the order of conjuctive elements for
+ * equi-join(m_equiJoinPredicateElements)
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ * 3. Keeps a map of projection indexes that are part of join keys to list of
+ * conjuctive elements(JoinLeafPredicateInfo) that uses them.
+ *
+ */
+ public static class JoinPredicateInfo {
+ private final ImmutableList m_nonEquiJoinPredicateElements;
+ private final ImmutableList m_equiJoinPredicateElements;
+ private final ImmutableSet m_projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInJoinSchema;
+ private final ImmutableMap> m_mapOfProjIndxInJoinSchemaToLeafPInfo;
+
+ public JoinPredicateInfo(List nonEquiJoinPredicateElements,
+ List equiJoinPredicateElements,
+ Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema,
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo) {
+ m_nonEquiJoinPredicateElements = ImmutableList.copyOf(nonEquiJoinPredicateElements);
+ m_equiJoinPredicateElements = ImmutableList.copyOf(equiJoinPredicateElements);
+ m_projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ m_mapOfProjIndxInJoinSchemaToLeafPInfo = ImmutableMap
+ .copyOf(mapOfProjIndxInJoinSchemaToLeafPInfo);
+ }
+
+ public List getNonEquiJoinPredicateElements() {
+ return m_nonEquiJoinPredicateElements;
+ }
+
+ public List getEquiJoinPredicateElements() {
+ return m_equiJoinPredicateElements;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return m_projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return m_projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ public Map> getMapOfProjIndxToLeafPInfo() {
+ return m_mapOfProjIndxInJoinSchemaToLeafPInfo;
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j) {
+ return constructJoinPredicateInfo(j, j.getCondition());
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j, RexNode predicate) {
+ JoinPredicateInfo jpi = null;
+ JoinLeafPredicateInfo jlpi = null;
+ List equiLPIList = new ArrayList();
+ List nonEquiLPIList = new ArrayList();
+ Set projsFromLeftPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ Map> tmpMapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ List tmpJLPILst = null;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+ int projIndxInJoin;
+ List conjuctiveElements;
+
+ todo("Move this to Optiq");
+
+ // 1. Decompose Join condition to a number of leaf predicates
+ // (conjuctive elements)
+ conjuctiveElements = RelOptUtil.conjunctions(predicate);
+
+ // 2. Walk through leaf predicates building up JoinLeafPredicateInfo
+ for (RexNode ce : conjuctiveElements) {
+ // 2.1 Construct JoinLeafPredicateInfo
+ jlpi = JoinLeafPredicateInfo.constructJoinLeafPredicateInfo(j, ce);
+
+ // 2.2 Classify leaf predicate as Equi vs Non Equi
+ if (jlpi.m_comparisonType.equals(SqlKind.EQUALS)) {
+ equiLPIList.add(jlpi);
+ } else {
+ nonEquiLPIList.add(jlpi);
+ }
+
+ // 2.3 Maintain join keys coming from left vs right (in child &
+ // Join Schema)
+ projsFromLeftPartOfJoinKeys.addAll(jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeys.addAll(jlpi.getProjsFromRightPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeysInJoinSchema.addAll(jlpi
+ .getProjsFromRightPartOfJoinKeysInJoinSchema());
+
+ // 2.4 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from left
+ for (Integer projIndx : jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndx);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndx, tmpJLPILst);
+ }
+
+ // 2.5 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from right
+ for (Integer projIndx : jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ projIndxInJoin = projIndx + rightOffSet;
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndxInJoin);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndxInJoin, tmpJLPILst);
+ }
+
+ }
+
+ // 3. Update Update Join Key to List to use
+ // ImmutableList
+ for (Entry> e : tmpMapOfProjIndxInJoinSchemaToLeafPInfo
+ .entrySet()) {
+ mapOfProjIndxInJoinSchemaToLeafPInfo.put(e.getKey(), ImmutableList.copyOf(e.getValue()));
+ }
+
+ // 4. Construct JoinPredicateInfo
+ jpi = new JoinPredicateInfo(nonEquiLPIList, equiLPIList, projsFromLeftPartOfJoinKeys,
+ projsFromRightPartOfJoinKeys, projsFromRightPartOfJoinKeysInJoinSchema,
+ mapOfProjIndxInJoinSchemaToLeafPInfo);
+ return jpi;
+ }
+ }
+
+ /**
+ * JoinLeafPredicateInfo represents leaf predicate in Join condition
+ * (conjuctive lement).
+ *
+ * JoinLeafPredicateInfo:
+ * 1. Stores list of expressions from left and right child which is part of
+ * equi join keys.
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ */
+ public static class JoinLeafPredicateInfo {
+ private final SqlKind m_comparisonType;
+ private final ImmutableList m_joinKeyExprsFromLeft;
+ private final ImmutableList m_joinKeyExprsFromRight;
+ private final ImmutableSet m_projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInJoinSchema;
+
+ public JoinLeafPredicateInfo(SqlKind comparisonType, List joinKeyExprsFromLeft,
+ List joinKeyExprsFromRight, Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema) {
+ m_comparisonType = comparisonType;
+ m_joinKeyExprsFromLeft = ImmutableList.copyOf(joinKeyExprsFromLeft);
+ m_joinKeyExprsFromRight = ImmutableList.copyOf(joinKeyExprsFromRight);
+ m_projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ }
+
+ public List getJoinKeyExprsFromLeft() {
+ return m_joinKeyExprsFromLeft;
+ }
+
+ public List getJoinKeyExprsFromRight() {
+ return m_joinKeyExprsFromRight;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return m_projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return m_projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ public static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoinRel j, RexNode pe) {
+ JoinLeafPredicateInfo jlpi = null;
+ List filterNulls = new ArrayList();
+ List joinKeyExprsFromLeft = new ArrayList();
+ List joinKeyExprsFromRight = new ArrayList();
+ Set projsFromLeftPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ todo("Move this to Optiq");
+
+ // 1. Split leaf join predicate to expressions from left, right
+ @SuppressWarnings("unused")
+ RexNode nonEquiPredicate = RelOptUtil.splitJoinCondition(j.getSystemFieldList(), j.getLeft(),
+ j.getRight(), pe, joinKeyExprsFromLeft, joinKeyExprsFromRight, filterNulls, null);
+
+ // 2. For left expressions, collect child projection indexes used
+ InputReferencedVisitor irvLeft = new InputReferencedVisitor();
+ irvLeft.apply(joinKeyExprsFromLeft);
+ projsFromLeftPartOfJoinKeysInChildSchema.addAll(irvLeft.inputPosReferenced);
+
+ // 3. For right expressions, collect child projection indexes used
+ InputReferencedVisitor irvRight = new InputReferencedVisitor();
+ irvRight.apply(joinKeyExprsFromRight);
+ projsFromRightPartOfJoinKeysInChildSchema.addAll(irvRight.inputPosReferenced);
+
+ // 3. Translate projection indexes from right to join schema, by adding
+ // offset.
+ for (Integer indx : projsFromRightPartOfJoinKeysInChildSchema) {
+ projsFromRightPartOfJoinKeysInJoinSchema.add(indx + rightOffSet);
+ }
+
+ // 4. Construct JoinLeafPredicateInfo
+ jlpi = new JoinLeafPredicateInfo(pe.getKind(), joinKeyExprsFromLeft, joinKeyExprsFromRight,
+ projsFromLeftPartOfJoinKeysInChildSchema, projsFromRightPartOfJoinKeysInChildSchema,
+ projsFromRightPartOfJoinKeysInJoinSchema);
+
+ return jlpi;
+ }
+ }
+
+ @Deprecated
+ public static void todo(String s) {
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/Pair.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/Pair.java
new file mode 100644
index 0000000..c923340
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/Pair.java
@@ -0,0 +1,19 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+public class Pair {
+ private final T1 m_first;
+ private final T2 m_second;
+
+ public Pair(T1 first, T2 second) {
+ m_first = first;
+ m_second = second;
+ }
+
+ public T1 getFirst() {
+ return m_first;
+ }
+
+ public T2 getSecond() {
+ return m_second;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
new file mode 100644
index 0000000..d3e517d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
@@ -0,0 +1,321 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.TableAccessRel;
+import org.eigenbase.relopt.RelOptAbstractTable;
+import org.eigenbase.relopt.RelOptSchema;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.rex.RexNode;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+
+public class RelOptHiveTable extends RelOptAbstractTable {
+ private final Table m_hiveTblMetadata;
+ private final ImmutableList m_hiveNonPartitionCols;
+ private final ImmutableMap m_hiveNonPartitionColsMap;
+ private final ImmutableMap m_hivePartitionColsMap;
+ private final int m_noOfProjs;
+ final HiveConf m_hiveConf;
+
+ private double m_rowCount = -1;
+ Map m_hiveColStatsMap = new HashMap();
+ private Integer m_numPartitions;
+ PrunedPartitionList partitionList;
+ Map partitionCache;
+ AtomicInteger noColsMissingStats;
+
+ protected static final Log LOG = LogFactory
+ .getLog(RelOptHiveTable.class
+ .getName());
+
+ public RelOptHiveTable(RelOptSchema optiqSchema, String name, RelDataType rowType,
+ Table hiveTblMetadata, List hiveNonPartitionCols,
+ List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) {
+ super(optiqSchema, name, rowType);
+ m_hiveTblMetadata = hiveTblMetadata;
+ m_hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols);
+ m_hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0);
+ m_hivePartitionColsMap = getColInfoMap(hivePartitionCols, m_hiveNonPartitionColsMap.size());
+ m_noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size();
+ m_hiveConf = hconf;
+ this.partitionCache = partitionCache;
+ this.noColsMissingStats = noColsMissingStats;
+ }
+
+ private static ImmutableMap getColInfoMap(List hiveCols,
+ int startIndx) {
+ Builder bldr = ImmutableMap. builder();
+
+ int indx = startIndx;
+ for (ColumnInfo ci : hiveCols) {
+ bldr.put(indx, ci);
+ indx++;
+ }
+
+ return bldr.build();
+ }
+
+ @Override
+ public boolean isKey(BitSet arg0) {
+ return false;
+ }
+
+ @Override
+ public RelNode toRel(ToRelContext context) {
+ return new TableAccessRel(context.getCluster(), this);
+ }
+
+ @Override
+ public T unwrap(Class arg0) {
+ return arg0.isInstance(this) ? arg0.cast(this) : null;
+ }
+
+ @Override
+ public double getRowCount() {
+ if (m_rowCount == -1) {
+ if (null == partitionList) {
+ // we are here either unpartitioned table or partitioned table with no predicates
+ computePartitionList(m_hiveConf, null);
+ }
+ if (m_hiveTblMetadata.isPartitioned()) {
+ List rowCounts = StatsUtils.getBasicStatForPartitions(
+ m_hiveTblMetadata, partitionList.getNotDeniedPartns(),
+ StatsSetupConst.ROW_COUNT);
+ m_rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts);
+
+ } else {
+ m_rowCount = StatsUtils.getNumRows(m_hiveTblMetadata);
+ }
+ }
+
+ return m_rowCount;
+ }
+
+ public Table getHiveTableMD() {
+ return m_hiveTblMetadata;
+ }
+
+ private String getColNamesForLogging(Set colLst) {
+ StringBuffer sb = new StringBuffer();
+ boolean firstEntry = true;
+ for (String colName : colLst) {
+ if (firstEntry) {
+ sb.append(colName);
+ firstEntry = false;
+ } else {
+ sb.append(", " + colName);
+ }
+ }
+ return sb.toString();
+ }
+
+ public void computePartitionList(HiveConf conf, RexNode pruneNode) {
+
+ try {
+ if (!m_hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) {
+ // there is no predicate on partitioning column, we need all partitions in this case.
+ partitionList = PartitionPruner.prune(m_hiveTblMetadata, null, conf, getName(), partitionCache);
+ return;
+ }
+
+ // We have valid pruning expressions, only retrieve qualifying partitions
+ ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+
+ partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, getName(), partitionCache);
+ } catch (HiveException he) {
+ throw new RuntimeException(he);
+ }
+ }
+
+ private void updateColStats(Set projIndxLst) {
+ List nonPartColNamesThatRqrStats = new ArrayList();
+ List nonPartColIndxsThatRqrStats = new ArrayList();
+ List partColNamesThatRqrStats = new ArrayList();
+ List partColIndxsThatRqrStats = new ArrayList();
+ Set colNamesFailedStats = new HashSet();
+
+ // 1. Separate required columns to Non Partition and Partition Cols
+ ColumnInfo tmp;
+ for (Integer pi : projIndxLst) {
+ if (m_hiveColStatsMap.get(pi) == null) {
+ if ((tmp = m_hiveNonPartitionColsMap.get(pi)) != null) {
+ nonPartColNamesThatRqrStats.add(tmp.getInternalName());
+ nonPartColIndxsThatRqrStats.add(pi);
+ } else if ((tmp = m_hivePartitionColsMap.get(pi)) != null) {
+ partColNamesThatRqrStats.add(tmp.getInternalName());
+ partColIndxsThatRqrStats.add(pi);
+ } else {
+ String logMsg = "Unable to find Column Index: " + pi + ", in "
+ + m_hiveTblMetadata.getCompleteName();
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+ }
+
+ if (null == partitionList) {
+ // We could be here either because its an unpartitioned table or because
+ // there are no pruning predicates on a partitioned table.
+ computePartitionList(m_hiveConf, null);
+ }
+
+ // 2. Obtain Col Stats for Non Partition Cols
+ if (nonPartColNamesThatRqrStats.size() > 0) {
+ List hiveColStats;
+
+ if (!m_hiveTblMetadata.isPartitioned()) {
+ // 2.1 Handle the case for unpartitioned table.
+ hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, m_hiveNonPartitionCols,
+ nonPartColNamesThatRqrStats);
+
+ // 2.1.1 Record Column Names that we needed stats for but couldn't
+ if (hiveColStats == null) {
+ colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
+ } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
+ Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats);
+
+ Set setOfObtainedColStats = new HashSet();
+ for (ColStatistics cs : hiveColStats) {
+ setOfObtainedColStats.add(cs.getColumnName());
+ }
+ setOfFiledCols.removeAll(setOfObtainedColStats);
+
+ colNamesFailedStats.addAll(setOfFiledCols);
+ }
+ } else {
+ // 2.2 Obtain col stats for partitioned table.
+ try {
+ if (partitionList.getNotDeniedPartns().isEmpty()) {
+ // no need to make a metastore call
+ m_rowCount = 0;
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ // add empty stats object for each column
+ hiveColStats.add(new ColStatistics(m_hiveTblMetadata.getTableName(), c, null));
+ }
+ colNamesFailedStats.clear();
+ } else {
+ Statistics stats = StatsUtils.collectStatistics(m_hiveConf, partitionList,
+ m_hiveTblMetadata, m_hiveNonPartitionCols, nonPartColNamesThatRqrStats, true, true);
+ m_rowCount = stats.getNumRows();
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ ColStatistics cs = stats.getColumnStatisticsFromColName(c);
+ if (cs != null) {
+ hiveColStats.add(cs);
+ } else {
+ colNamesFailedStats.add(c);
+ }
+ }
+ }
+ } catch (HiveException e) {
+ String logMsg = "Collecting stats failed.";
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
+ for (int i = 0; i < hiveColStats.size(); i++) {
+ m_hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
+ }
+ }
+ }
+
+ // 3. Obtain Stats for Partition Cols
+ if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
+ m_numPartitions = partitionList.getPartitions().size();
+ ColStatistics cStats = null;
+ for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
+ cStats = new ColStatistics(m_hiveTblMetadata.getTableName(),
+ partColNamesThatRqrStats.get(i), m_hivePartitionColsMap.get(
+ partColIndxsThatRqrStats.get(i)).getTypeName());
+ cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i)));
+ m_hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats);
+ }
+ }
+
+ // 4. Warn user if we could get stats for required columns
+ if (!colNamesFailedStats.isEmpty()) {
+ String logMsg = "No Stats for " + m_hiveTblMetadata.getCompleteName() + ", Columns: "
+ + getColNamesForLogging(colNamesFailedStats);
+ LOG.error(logMsg);
+ noColsMissingStats.getAndAdd(colNamesFailedStats.size());
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ private int getDistinctCount(Set partitions, String partColName) {
+ Set distinctVals = new HashSet(partitions.size());
+ for (Partition partition : partitions) {
+ distinctVals.add(partition.getSpec().get(partColName));
+ }
+ return distinctVals.size();
+ }
+
+ public List getColStat(List projIndxLst) {
+ ImmutableList.Builder colStatsBldr = ImmutableList. builder();
+
+ if (projIndxLst != null) {
+ updateColStats(new HashSet(projIndxLst));
+ for (Integer i : projIndxLst) {
+ colStatsBldr.add(m_hiveColStatsMap.get(i));
+ }
+ } else {
+ List pILst = new ArrayList();
+ for (Integer i = 0; i < m_noOfProjs; i++) {
+ pILst.add(i);
+ }
+ updateColStats(new HashSet(pILst));
+ for (Integer pi : pILst) {
+ colStatsBldr.add(m_hiveColStatsMap.get(pi));
+ }
+ }
+
+ return colStatsBldr.build();
+ }
+
+ /*
+ * use to check if a set of columns are all partition columns.
+ * true only if:
+ * - all columns in BitSet are partition
+ * columns.
+ */
+ public boolean containsPartitionColumnsOnly(BitSet cols) {
+
+ for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) {
+ if (!m_hivePartitionColsMap.containsKey(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java
new file mode 100644
index 0000000..d4bd678
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java
@@ -0,0 +1,52 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelCollationImpl;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+
+public class TraitsUtil {
+
+ public static RelTraitSet getSelectTraitSet(RelOptCluster cluster, RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getSortTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelCollation collation) {
+ return traitSet.plus(collation);
+ }
+
+ public static RelTraitSet getFilterTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getLimitTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getAggregateTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ List gbCols, List aggCalls, RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getTableScanTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelOptHiveTable table, RelDataType rowtype) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getJoinTraitSet(RelOptCluster cluster, RelTraitSet traitSet) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getUnionTraitSet(RelOptCluster cluster, RelTraitSet traitSet) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java
new file mode 100644
index 0000000..34a37e4
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java
@@ -0,0 +1,194 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptCostFactory;
+import org.eigenbase.relopt.RelOptUtil;
+
+// TODO: This should inherit from VolcanoCost and should just override isLE method.
+public class HiveCost implements RelOptCost {
+ // ~ Static fields/initializers ---------------------------------------------
+
+ public static final HiveCost INFINITY = new HiveCost(Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY) {
+ @Override
+ public String toString() {
+ return "{inf}";
+ }
+ };
+
+ public static final HiveCost HUGE = new HiveCost(Double.MAX_VALUE, Double.MAX_VALUE,
+ Double.MAX_VALUE) {
+ @Override
+ public String toString() {
+ return "{huge}";
+ }
+ };
+
+ public static final HiveCost ZERO = new HiveCost(0.0, 0.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{0}";
+ }
+ };
+
+ public static final HiveCost TINY = new HiveCost(1.0, 1.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{tiny}";
+ }
+ };
+
+ public static final RelOptCostFactory FACTORY = new Factory();
+
+ // ~ Instance fields --------------------------------------------------------
+
+ final double cpu;
+ final double io;
+ final double rowCount;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ HiveCost(double rowCount, double cpu, double io) {
+ assert rowCount >= 0d;
+ assert cpu >= 0d;
+ assert io >= 0d;
+ this.rowCount = rowCount;
+ this.cpu = cpu;
+ this.io = io;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ public double getCpu() {
+ return cpu;
+ }
+
+ public boolean isInfinite() {
+ return (this == INFINITY) || (this.rowCount == Double.POSITIVE_INFINITY)
+ || (this.cpu == Double.POSITIVE_INFINITY) || (this.io == Double.POSITIVE_INFINITY);
+ }
+
+ public double getIo() {
+ return io;
+ }
+
+ // TODO: If two cost is equal, could we do any better than comparing
+ // cardinality (may be some other heuristics to break the tie)
+ public boolean isLe(RelOptCost other) {
+ return this == other || this.rowCount <= other.getRows();
+ /*
+ * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows
+ * <= other.getRows())) { return true; } else { return false; }
+ */
+ }
+
+ public boolean isLt(RelOptCost other) {
+ return this.rowCount < other.getRows();
+ /*
+ * return isLe(other) && !equals(other);
+ */
+ }
+
+ public double getRows() {
+ return rowCount;
+ }
+
+ public boolean equals(RelOptCost other) {
+ return (this == other) || ((this.rowCount) == (other.getRows()));
+
+ /*
+ * //TODO: should we consider cardinality as well? return (this == other) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()));
+ */
+ }
+
+ public boolean isEqWithEpsilon(RelOptCost other) {
+ return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON);
+ /*
+ * return (this == other) || (Math.abs((this.dCpu + this.dIo) -
+ * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON);
+ */
+ }
+
+ public RelOptCost minus(RelOptCost other) {
+ if (this == INFINITY) {
+ return this;
+ }
+
+ return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io
+ - other.getIo());
+ }
+
+ public RelOptCost multiplyBy(double factor) {
+ if (this == INFINITY) {
+ return this;
+ }
+ return new HiveCost(rowCount * factor, cpu * factor, io * factor);
+ }
+
+ public double divideBy(RelOptCost cost) {
+ // Compute the geometric average of the ratios of all of the factors
+ // which are non-zero and finite.
+ double d = 1;
+ double n = 0;
+ if ((this.rowCount != 0) && !Double.isInfinite(this.rowCount) && (cost.getRows() != 0)
+ && !Double.isInfinite(cost.getRows())) {
+ d *= this.rowCount / cost.getRows();
+ ++n;
+ }
+ if ((this.cpu != 0) && !Double.isInfinite(this.cpu) && (cost.getCpu() != 0)
+ && !Double.isInfinite(cost.getCpu())) {
+ d *= this.cpu / cost.getCpu();
+ ++n;
+ }
+ if ((this.io != 0) && !Double.isInfinite(this.io) && (cost.getIo() != 0)
+ && !Double.isInfinite(cost.getIo())) {
+ d *= this.io / cost.getIo();
+ ++n;
+ }
+ if (n == 0) {
+ return 1.0;
+ }
+ return Math.pow(d, 1 / n);
+ }
+
+ public RelOptCost plus(RelOptCost other) {
+ if ((this == INFINITY) || (other.isInfinite())) {
+ return INFINITY;
+ }
+ return new HiveCost(this.rowCount + other.getRows(), this.cpu + other.getCpu(), this.io
+ + other.getIo());
+ }
+
+ @Override
+ public String toString() {
+ return "{" + rowCount + " rows, " + cpu + " cpu, " + io + " io}";
+ }
+
+ private static class Factory implements RelOptCostFactory {
+ private Factory() {
+ }
+
+ public RelOptCost makeCost(double rowCount, double cpu, double io) {
+ return new HiveCost(rowCount, cpu, io);
+ }
+
+ public RelOptCost makeHugeCost() {
+ return HUGE;
+ }
+
+ public HiveCost makeInfiniteCost() {
+ return INFINITY;
+ }
+
+ public HiveCost makeTinyCost() {
+ return TINY;
+ }
+
+ public HiveCost makeZeroCost() {
+ return ZERO;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java
new file mode 100644
index 0000000..926bca5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java
@@ -0,0 +1,23 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.relopt.RelOptCost;
+
+public class HiveCostUtil {
+ private static final double cpuCostInNanoSec = 1.0;
+ private static final double netCostInNanoSec = 150 * cpuCostInNanoSec;
+ private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec;
+ private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec;
+
+ public static RelOptCost computCardinalityBasedCost(HiveRel hr) {
+ return new HiveCost(hr.getRows(), 0, 0);
+ }
+
+ public static HiveCost computeCost(HiveTableScanRel t) {
+ double cardinality = t.getRows();
+ return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java
new file mode 100644
index 0000000..15596bc
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java
@@ -0,0 +1,31 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.eigenbase.rel.RelCollationTraitDef;
+import org.eigenbase.relopt.ConventionTraitDef;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.volcano.VolcanoPlanner;
+
+/**
+ * Refinement of {@link org.eigenbase.relopt.volcano.VolcanoPlanner} for Hive.
+ *
+ *
+ * It uses {@link org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost} as
+ * its cost model.
+ */
+public class HiveVolcanoPlanner extends VolcanoPlanner {
+ private static final boolean ENABLE_COLLATION_TRAIT = true;
+
+ /** Creates a HiveVolcanoPlanner. */
+ public HiveVolcanoPlanner() {
+ super(HiveCost.FACTORY, null);
+ }
+
+ public static RelOptPlanner createPlanner() {
+ final VolcanoPlanner planner = new HiveVolcanoPlanner();
+ planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+ if (ENABLE_COLLATION_TRAIT) {
+ planner.addRelTraitDef(RelCollationTraitDef.INSTANCE);
+ }
+ return planner;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java
new file mode 100644
index 0000000..c81fd8a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java
@@ -0,0 +1,70 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.BitSet;
+import java.util.List;
+
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.RelFactories.AggregateFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+
+public class HiveAggregateRel extends AggregateRelBase implements HiveRel {
+
+ public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory();
+
+ public HiveAggregateRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ BitSet groupSet, List aggCalls) throws InvalidRelException {
+ super(cluster, TraitsUtil.getAggregateTraitSet(cluster, traitSet, BitSets.toList(groupSet),
+ aggCalls, child), child, groupSet, aggCalls);
+ }
+
+ @Override
+ public AggregateRelBase copy(RelTraitSet traitSet, RelNode input, BitSet groupSet,
+ List aggCalls) {
+ try {
+ return new HiveAggregateRel(getCluster(), traitSet, input, groupSet, aggCalls);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public double getRows() {
+ return RelMetadataQuery.getDistinctRowCount(this, groupSet, getCluster().getRexBuilder()
+ .makeLiteral(true));
+ }
+
+ private static class HiveAggRelFactory implements AggregateFactory {
+
+ @Override
+ public RelNode createAggregate(RelNode child, BitSet groupSet,
+ List aggCalls) {
+ try {
+ return new HiveAggregateRel(child.getCluster(), child.getTraitSet(), child, groupSet, aggCalls);
+ } catch (InvalidRelException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java
new file mode 100644
index 0000000..ebf420d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java
@@ -0,0 +1,53 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.RelFactories.FilterFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveFilterRel extends FilterRelBase implements HiveRel {
+
+ public static final FilterFactory DEFAULT_FILTER_FACTORY = new HiveFilterFactoryImpl();
+
+ public HiveFilterRel(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) {
+ super(cluster, TraitsUtil.getFilterTraitSet(cluster, traits, child), child, condition);
+ }
+
+ @Override
+ public FilterRelBase copy(RelTraitSet traitSet, RelNode input, RexNode condition) {
+ assert traitSet.containsIfApplicable(HiveRel.CONVENTION);
+ return new HiveFilterRel(getCluster(), traitSet, input, getCondition());
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ /**
+ * Implementation of {@link FilterFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel}
+ * .
+ */
+ private static class HiveFilterFactoryImpl implements FilterFactory {
+ @Override
+ public RelNode createFilter(RelNode child, RexNode condition) {
+ RelOptCluster cluster = child.getCluster();
+ HiveFilterRel filter = new HiveFilterRel(cluster, TraitsUtil.getFilterTraitSet(cluster, null,
+ child), child, condition);
+ return filter;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java
new file mode 100644
index 0000000..6f642b2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java
@@ -0,0 +1,138 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelFactories.JoinFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexNode;
+
+//TODO: Should we convert MultiJoin to be a child of HiveJoinRelBase
+public class HiveJoinRel extends JoinRelBase implements HiveRel {
+ // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN
+ // each parallel computation handles multiple splits where as in case of SMB
+ // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is
+ // hash joins where MAP_JOIN keeps the whole data set of non streaming tables
+ // in memory where as BUCKET_JOIN keeps only the b
+ public enum JoinAlgorithm {
+ NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN
+ }
+
+ public enum MapJoinStreamingRelation {
+ NONE, LEFT_RELATION, RIGHT_RELATION
+ }
+
+ public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl();
+
+ private final boolean m_leftSemiJoin;
+ private final JoinAlgorithm m_joinAlgorithm;
+ private final MapJoinStreamingRelation m_mapJoinStreamingSide = MapJoinStreamingRelation.NONE;
+
+ public static HiveJoinRel getJoin(RelOptCluster cluster, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, boolean leftSemiJoin) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoinRel(cluster, null, left, right, condition, joinType, variablesStopped,
+ JoinAlgorithm.NONE, null, leftSemiJoin);
+ } catch (InvalidRelException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ protected HiveJoinRel(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, Set variablesStopped,
+ JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin)
+ throws InvalidRelException {
+ super(cluster, TraitsUtil.getJoinTraitSet(cluster, traits), left, right, condition, joinType,
+ variablesStopped);
+ this.m_joinAlgorithm = joinAlgo;
+ m_leftSemiJoin = leftSemiJoin;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public final HiveJoinRel copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left,
+ RelNode right, JoinRelType joinType, boolean semiJoinDone) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoinRel(getCluster(), traitSet, left, right, conditionExpr, joinType,
+ variablesStopped, JoinAlgorithm.NONE, null, m_leftSemiJoin);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ public JoinAlgorithm getJoinAlgorithm() {
+ return m_joinAlgorithm;
+ }
+
+ public boolean isLeftSemiJoin() {
+ return m_leftSemiJoin;
+ }
+
+ /**
+ * Model cost of join as size of Inputs.
+ */
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ double leftRCount = RelMetadataQuery.getRowCount(getLeft());
+ double rightRCount = RelMetadataQuery.getRowCount(getRight());
+ return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0);
+ }
+
+ /**
+ * @return returns rowtype representing only the left join input
+ */
+ @Override
+ public RelDataType deriveRowType() {
+ if (m_leftSemiJoin) {
+ return deriveJoinRowType(left.getRowType(), null, JoinRelType.INNER,
+ getCluster().getTypeFactory(), null,
+ Collections. emptyList());
+ }
+ return super.deriveRowType();
+ }
+
+ private static class HiveJoinFactoryImpl implements JoinFactory {
+ /**
+ * Creates a join.
+ *
+ * @param left
+ * Left input
+ * @param right
+ * Right input
+ * @param condition
+ * Join condition
+ * @param joinType
+ * Join type
+ * @param variablesStopped
+ * Set of names of variables which are set by the LHS and used by
+ * the RHS and are not available to nodes above this JoinRel in the
+ * tree
+ * @param semiJoinDone
+ * Whether this join has been translated to a semi-join
+ */
+ @Override
+ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType,
+ Set variablesStopped, boolean semiJoinDone) {
+ return getJoin(left.getCluster(), left, right, condition, joinType, false);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java
new file mode 100644
index 0000000..bf37a7b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java
@@ -0,0 +1,40 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SingleRel;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveLimitRel extends SingleRel implements HiveRel {
+ private final RexNode offset;
+ private final RexNode fetch;
+
+ HiveLimitRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RexNode offset,
+ RexNode fetch) {
+ super(cluster, TraitsUtil.getLimitTraitSet(cluster, traitSet, child), child);
+ this.offset = offset;
+ this.fetch = fetch;
+ assert getConvention() instanceof HiveRel;
+ assert getConvention() == child.getConvention();
+ }
+
+ @Override
+ public HiveLimitRel copy(RelTraitSet traitSet, List newInputs) {
+ return new HiveLimitRel(getCluster(), traitSet, sole(newInputs), offset, fetch);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java
new file mode 100644
index 0000000..a60af2e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java
@@ -0,0 +1,168 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import com.google.common.collect.ImmutableList;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelFactories.ProjectFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.util.mapping.Mapping;
+import org.eigenbase.util.mapping.MappingType;
+
+public class HiveProjectRel extends ProjectRelBase implements HiveRel {
+
+ public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl();
+
+ private final List m_virtualCols;
+
+ /**
+ * Creates a HiveProjectRel.
+ *
+ * @param cluster
+ * Cluster this relational expression belongs to
+ * @param child
+ * input relational expression
+ * @param exps
+ * List of expressions for the input columns
+ * @param rowType
+ * output row type
+ * @param flags
+ * values as in {@link ProjectRelBase.Flags}
+ */
+ public HiveProjectRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ List extends RexNode> exps, RelDataType rowType, int flags) {
+ super(cluster, traitSet, child, exps, rowType, flags);
+ m_virtualCols = ImmutableList.copyOf(HiveOptiqUtil.getVirtualCols(exps));
+ }
+
+ /**
+ * Creates a HiveProjectRel with no sort keys.
+ *
+ * @param child
+ * input relational expression
+ * @param exps
+ * set of expressions for the input columns
+ * @param fieldNames
+ * aliases of the expressions
+ */
+ public static HiveProjectRel create(RelNode child, List extends RexNode> exps, List fieldNames) {
+ RelOptCluster cluster = child.getCluster();
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames);
+ return create(cluster, child, exps, rowType, Collections. emptyList());
+ }
+
+ /**
+ * Creates a HiveProjectRel.
+ */
+ public static HiveProjectRel create(RelOptCluster cluster, RelNode child, List extends RexNode> exps,
+ RelDataType rowType, final List collationList) {
+ RelTraitSet traitSet = TraitsUtil.getSelectTraitSet(cluster, child);
+ return new HiveProjectRel(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a relational expression which projects the output fields of a
+ * relational expression according to a partial mapping.
+ *
+ *
+ * A partial mapping is weaker than a permutation: every target has one
+ * source, but a source may have 0, 1 or more than one targets. Usually the
+ * result will have fewer fields than the source, unless some source fields
+ * are projected multiple times.
+ *
+ *
+ * This method could optimize the result as {@link #permute} does, but does
+ * not at present.
+ *
+ * @param rel
+ * Relational expression
+ * @param mapping
+ * Mapping from source fields to target fields. The mapping type must
+ * obey the constraints {@link MappingType#isMandatorySource()} and
+ * {@link MappingType#isSingleSource()}, as does
+ * {@link MappingType#INVERSE_FUNCTION}.
+ * @param fieldNames
+ * Field names; if null, or if a particular entry is null, the name
+ * of the permuted field is used
+ * @return relational expression which projects a subset of the input fields
+ */
+ public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) {
+ assert mapping.getMappingType().isSingleSource();
+ assert mapping.getMappingType().isMandatorySource();
+
+ if (mapping.isIdentity()) {
+ return rel;
+ }
+
+ final List outputNameList = new ArrayList();
+ final List outputProjList = new ArrayList();
+ final List fields = rel.getRowType().getFieldList();
+ final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+
+ for (int i = 0; i < mapping.getTargetCount(); i++) {
+ int source = mapping.getSource(i);
+ final RelDataTypeField sourceField = fields.get(source);
+ outputNameList
+ .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField
+ .getName() : fieldNames.get(i));
+ outputProjList.add(rexBuilder.makeInputRef(rel, source));
+ }
+
+ return create(rel, outputProjList, outputNameList);
+ }
+
+ @Override
+ public ProjectRelBase copy(RelTraitSet traitSet, RelNode input, List exps,
+ RelDataType rowType) {
+ assert traitSet.containsIfApplicable(HiveRel.CONVENTION);
+ return new HiveProjectRel(getCluster(), traitSet, input, exps, rowType, getFlags());
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ public List getVirtualCols() {
+ return m_virtualCols;
+ }
+
+ /**
+ * Implementation of {@link ProjectFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel}
+ * .
+ */
+ private static class HiveProjectFactoryImpl implements ProjectFactory {
+
+ @Override
+ public RelNode createProject(RelNode child,
+ List extends RexNode> childExprs, List fieldNames) {
+ RelNode project = HiveProjectRel.create(child, childExprs, fieldNames);
+
+ // Make sure extra traits are carried over from the original rel
+ project = RelOptRule.convert(project, child.getTraitSet());
+ return project;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java
new file mode 100644
index 0000000..6f3f1d8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java
@@ -0,0 +1,19 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.Convention;
+
+public interface HiveRel extends RelNode {
+ void implement(Implementor implementor);
+
+ /** Calling convention for relational operations that occur in Hive. */
+ final Convention CONVENTION = new Convention.Impl("HIVE", HiveRel.class);
+
+ class Implementor {
+
+ public void visitChild(int ordinal, RelNode input) {
+ assert ordinal == 0;
+ ((HiveRel) input).implement(this);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java
new file mode 100644
index 0000000..3bd7889
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java
@@ -0,0 +1,49 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelFactories;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SortRel;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveSortRel extends SortRel implements HiveRel {
+
+ public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory();
+
+ public HiveSortRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ RelCollation collation, RexNode offset, RexNode fetch) {
+ super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
+ offset, fetch);
+
+ assert getConvention() == child.getConvention();
+ }
+
+ @Override
+ public HiveSortRel copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
+ RexNode offset, RexNode fetch) {
+ // TODO: can we blindly copy sort trait? What if inputs changed and we
+ // are now sorting by different cols
+ RelCollation canonizedCollation = traitSet.canonize(newCollation);
+ return new HiveSortRel(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ }
+
+ public RexNode getFetchExpr() {
+ return fetch;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveSortRelFactory implements RelFactories.SortFactory {
+
+ @Override
+ public RelNode createSort(RelTraitSet traits, RelNode child,
+ RelCollation collation, RexNode offset, RexNode fetch) {
+ return new HiveSortRel(child.getCluster(), traits, child, collation, offset, fetch);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
new file mode 100644
index 0000000..4fe1735
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
@@ -0,0 +1,76 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+
+
+/**
+ * Relational expression representing a scan of a HiveDB collection.
+ *
+ *
+ * Additional operations might be applied, using the "find" or "aggregate"
+ * methods.
+ *
+ */
+public class HiveTableScanRel extends TableAccessRelBase implements HiveRel {
+ private List m_hiveColStat;
+
+ /**
+ * Creates a HiveTableScan.
+ *
+ * @param cluster
+ * Cluster
+ * @param traitSet
+ * Traits
+ * @param table
+ * Table
+ * @param table
+ * HiveDB table
+ */
+ public HiveTableScanRel(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
+ RelDataType rowtype) {
+ super(cluster, TraitsUtil.getTableScanTraitSet(cluster, traitSet, table, rowtype), table);
+ assert getConvention() == HiveRel.CONVENTION;
+ }
+
+ @Override
+ public RelNode copy(RelTraitSet traitSet, List inputs) {
+ assert inputs.isEmpty();
+ return this;
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void register(RelOptPlanner planner) {
+
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+
+ }
+
+ @Override
+ public double getRows() {
+ return ((RelOptHiveTable) table).getRowCount();
+ }
+
+ public List getColStat(List projIndxLst) {
+ return ((RelOptHiveTable) table).getColStat(projIndxLst);
+ }
+}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java
new file mode 100644
index 0000000..ccd52b0
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java
@@ -0,0 +1,40 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel.Implementor;
+import org.eigenbase.rel.RelFactories;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SetOpRel;
+import org.eigenbase.rel.UnionRelBase;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.sql.SqlKind;
+
+public class HiveUnionRel extends UnionRelBase {
+
+ public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory();
+
+ public HiveUnionRel(RelOptCluster cluster, RelTraitSet traits, List inputs) {
+ super(cluster, traits, inputs, true);
+ }
+
+ @Override
+ public SetOpRel copy(RelTraitSet traitSet, List inputs, boolean all) {
+ return new HiveUnionRel(this.getCluster(), traitSet, inputs);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveUnionRelFactory implements RelFactories.SetOpFactory {
+
+ @Override
+ public RelNode createSetOp(SqlKind kind, List inputs, boolean all) {
+ if (kind != SqlKind.UNION) {
+ throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind);
+ }
+ return new HiveUnionRel(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java
new file mode 100644
index 0000000..a34b532
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java
@@ -0,0 +1,12 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.eigenbase.rel.rules.MergeProjectRule;
+
+public class HiveMergeProjectRule extends MergeProjectRule {
+ public static final HiveMergeProjectRule INSTANCE = new HiveMergeProjectRule();
+
+ public HiveMergeProjectRule() {
+ super(true, HiveProjectRel.DEFAULT_PROJECT_FACTORY);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
new file mode 100644
index 0000000..6f06c6a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.util.Pair;
+
+public class HivePartitionPrunerRule extends RelOptRule {
+
+ HiveConf conf;
+
+ public HivePartitionPrunerRule(HiveConf conf) {
+ super(operand(HiveFilterRel.class, operand(HiveTableScanRel.class, none())));
+ this.conf = conf;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilterRel filter = call.rel(0);
+ HiveTableScanRel tScan = call.rel(1);
+ perform(call, filter, tScan);
+ }
+
+ protected void perform(RelOptRuleCall call, FilterRelBase filter,
+ HiveTableScanRel tScan) {
+
+ RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
+ RexNode predicate = filter.getCondition();
+
+ Pair predicates = PartitionPruner
+ .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
+ RexNode partColExpr = predicates.left;
+ RexNode remainingExpr = predicates.right;
+ remainingExpr = remainingExpr == null ? filter.getCluster().getRexBuilder()
+ .makeLiteral(true) : remainingExpr;
+ hiveTable.computePartitionList(conf, partColExpr);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePullUpProjectsAboveJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePullUpProjectsAboveJoinRule.java
new file mode 100644
index 0000000..a48112e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePullUpProjectsAboveJoinRule.java
@@ -0,0 +1,44 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.rules.PullUpProjectsAboveJoinRule;
+import org.eigenbase.relopt.RelOptRuleOperand;
+
+public class HivePullUpProjectsAboveJoinRule extends PullUpProjectsAboveJoinRule {
+
+ public static final HivePullUpProjectsAboveJoinRule BOTH_PROJECT = new HivePullUpProjectsAboveJoinRule(
+ operand(
+ HiveJoinRel.class,
+ operand(
+ ProjectRelBase.class,
+ any()),
+ operand(
+ ProjectRelBase.class,
+ any())),
+ "HivePullUpProjectsAboveJoinRule: with two HiveProjectRel children");
+
+ public static final HivePullUpProjectsAboveJoinRule LEFT_PROJECT = new HivePullUpProjectsAboveJoinRule(
+ operand(
+ HiveJoinRel.class,
+ some(operand(
+ ProjectRelBase.class,
+ any()))),
+ "HivePullUpProjectsAboveJoinRule: with HiveProjectRel on left");
+
+ public static final HivePullUpProjectsAboveJoinRule RIGHT_PROJECT = new HivePullUpProjectsAboveJoinRule(
+ operand(
+ HiveJoinRel.class,
+ operand(RelNode.class,
+ any()),
+ operand(
+ ProjectRelBase.class,
+ any())),
+ "HivePullUpProjectsAboveJoinRule: with HiveProjectRel on right");
+
+ public HivePullUpProjectsAboveJoinRule(RelOptRuleOperand operand, String description) {
+ super(operand, description, HiveProjectRel.DEFAULT_PROJECT_FACTORY);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
new file mode 100644
index 0000000..f8d1ac1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
@@ -0,0 +1,276 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+import org.eigenbase.relopt.RelOptRuleOperand;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.util.Holder;
+
+import com.google.common.collect.ImmutableList;
+
+public abstract class HivePushFilterPastJoinRule extends RelOptRule {
+
+ public static final HivePushFilterPastJoinRule FILTER_ON_JOIN = new HivePushFilterPastJoinRule(
+ operand(FilterRelBase.class, operand(HiveJoinRel.class, any())),
+ "HivePushFilterPastJoinRule:filter", true) {
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilterRel filter = call.rel(0);
+ HiveJoinRel join = call.rel(1);
+ perform(call, filter, join);
+ }
+ };
+
+ public static final HivePushFilterPastJoinRule JOIN = new HivePushFilterPastJoinRule(
+ operand(HiveJoinRel.class, any()), "HivePushFilterPastJoinRule:no-filter", false) {
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveJoinRel join = call.rel(0);
+ perform(call, null, join);
+ }
+ };
+
+ /** Whether to try to strengthen join-type. */
+ private final boolean smart;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a PushFilterPastJoinRule with an explicit root operand.
+ */
+ private HivePushFilterPastJoinRule(RelOptRuleOperand operand, String id, boolean smart) {
+ super(operand, "PushFilterRule: " + id);
+ this.smart = smart;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ protected void perform(RelOptRuleCall call, FilterRelBase filter,
+ JoinRelBase join) {
+ final List joinFilters = RelOptUtil.conjunctions(join
+ .getCondition());
+
+ /*
+ * todo: hb 6/26/14 for left SemiJoin we cannot push predicates yet. The
+ * assertion that num(JoinRel columns) = num(leftSrc) + num(rightSrc)
+ * doesn't hold. So RelOptUtil.classifyFilters fails.
+ */
+ if (((HiveJoinRel) join).isLeftSemiJoin()) {
+ return;
+ }
+
+ if (filter == null) {
+ // There is only the joinRel
+ // make sure it does not match a cartesian product joinRel
+ // (with "true" condition) otherwise this rule will be applied
+ // again on the new cartesian product joinRel.
+ boolean onlyTrueFilter = true;
+ for (RexNode joinFilter : joinFilters) {
+ if (!joinFilter.isAlwaysTrue()) {
+ onlyTrueFilter = false;
+ break;
+ }
+ }
+
+ if (onlyTrueFilter) {
+ return;
+ }
+ }
+
+ final List aboveFilters = filter != null ? RelOptUtil
+ .conjunctions(filter.getCondition()) : ImmutableList. of();
+
+ List leftFilters = new ArrayList();
+ List rightFilters = new ArrayList();
+ int origJoinFiltersSz = joinFilters.size();
+
+ // TODO - add logic to derive additional filters. E.g., from
+ // (t1.a = 1 AND t2.a = 2) OR (t1.b = 3 AND t2.b = 4), you can
+ // derive table filters:
+ // (t1.a = 1 OR t1.b = 3)
+ // (t2.a = 2 OR t2.b = 4)
+
+ // Try to push down above filters. These are typically where clause
+ // filters. They can be pushed down if they are not on the NULL
+ // generating side.
+ boolean filterPushed = false;
+ final Holder joinTypeHolder = Holder.of(join.getJoinType());
+ if (RelOptUtil.classifyFilters(join, aboveFilters,
+ join.getJoinType(), true, !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType()
+ .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+ filterPushed = true;
+ }
+
+ /*
+ * Any predicates pushed down to joinFilters that aren't equality
+ * conditions: put them back as aboveFilters because Hive doesn't support
+ * not equi join conditions.
+ */
+ ListIterator filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ if (c.getOperator().getKind() == SqlKind.EQUALS) {
+ boolean validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left & right side
+ // of join condition. Example: Hive disallows
+ // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ if (validHiveJoinFilter)
+ continue;
+ }
+ }
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+
+ /*
+ * if all pushed filters where put back then set filterPushed to false
+ */
+ if (leftFilters.size() == 0 && rightFilters.size() == 0
+ && joinFilters.size() == origJoinFiltersSz) {
+ filterPushed = false;
+ }
+
+ // Try to push down filters in ON clause. A ON clause filter can only be
+ // pushed down if it does not affect the non-matching set, i.e. it is
+ // not on the side which is preserved.
+ if (RelOptUtil.classifyFilters(join, joinFilters, null, false, !join
+ .getJoinType().generatesNullsOnRight(), !join.getJoinType()
+ .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
+ filterPushed = true;
+ }
+
+ if (!filterPushed) {
+ return;
+ }
+
+ /*
+ * Remove always true conditions that got pushed down.
+ */
+ removeAlwaysTruePredicates(leftFilters);
+ removeAlwaysTruePredicates(rightFilters);
+ removeAlwaysTruePredicates(joinFilters);
+
+ // create FilterRels on top of the children if any filters were
+ // pushed to them
+ RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+ RelNode leftRel = createFilterOnRel(rexBuilder, join.getLeft(), leftFilters);
+ RelNode rightRel = createFilterOnRel(rexBuilder, join.getRight(),
+ rightFilters);
+
+ // create the new join node referencing the new children and
+ // containing its new join filters (if there are any)
+ RexNode joinFilter;
+
+ if (joinFilters.size() == 0) {
+ // if nothing actually got pushed and there is nothing leftover,
+ // then this rule is a no-op
+ if (leftFilters.isEmpty()
+ && rightFilters.isEmpty()
+ && joinTypeHolder.get() == join.getJoinType()) {
+ return;
+ }
+ joinFilter = rexBuilder.makeLiteral(true);
+ } else {
+ joinFilter = RexUtil.composeConjunction(rexBuilder, joinFilters, true);
+ }
+ RelNode newJoinRel = HiveJoinRel.getJoin(join.getCluster(), leftRel,
+ rightRel, joinFilter, join.getJoinType(), false);
+
+ // create a FilterRel on top of the join if needed
+ RelNode newRel = createFilterOnRel(rexBuilder, newJoinRel, aboveFilters);
+
+ call.transformTo(newRel);
+ }
+
+ /**
+ * If the filter list passed in is non-empty, creates a FilterRel on top of
+ * the existing RelNode; otherwise, just returns the RelNode
+ *
+ * @param rexBuilder
+ * rex builder
+ * @param rel
+ * the RelNode that the filter will be put on top of
+ * @param filters
+ * list of filters
+ * @return new RelNode or existing one if no filters
+ */
+ private RelNode createFilterOnRel(RexBuilder rexBuilder, RelNode rel,
+ List filters) {
+ RexNode andFilters = RexUtil.composeConjunction(rexBuilder, filters, false);
+ if (andFilters.isAlwaysTrue()) {
+ return rel;
+ }
+ return new HiveFilterRel(rel.getCluster(), rel.getCluster().traitSetOf(
+ HiveRel.CONVENTION), rel, andFilters);
+ }
+
+ private void removeAlwaysTruePredicates(List predicates) {
+
+ ListIterator iter = predicates.listIterator();
+ while (iter.hasNext()) {
+ RexNode exp = iter.next();
+ if (isAlwaysTrue(exp)) {
+ iter.remove();
+ }
+ }
+ }
+
+ private boolean isAlwaysTrue(RexNode predicate) {
+ if (predicate instanceof RexCall) {
+ RexCall c = (RexCall) predicate;
+ if (c.getOperator().getKind() == SqlKind.EQUALS) {
+ return isAlwaysTrue(c.getOperands().get(0))
+ && isAlwaysTrue(c.getOperands().get(1));
+ }
+ }
+ return predicate.isAlwaysTrue();
+ }
+
+ private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
+ boolean refersToBothSides = false;
+
+ int joinNoOfProjects = j.getRowType().getFieldCount();
+ BitSet filterProjs = new BitSet(joinNoOfProjects);
+ BitSet allLeftProjs = new BitSet(joinNoOfProjects);
+ BitSet allRightProjs = new BitSet(joinNoOfProjects);
+ allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true);
+ allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true);
+
+ InputFinder inputFinder = new InputFinder(filterProjs);
+ filter.accept(inputFinder);
+
+ if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+ refersToBothSides = true;
+
+ return refersToBothSides;
+ }
+}
+
+// End PushFilterPastJoinRule.java
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushJoinThroughJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushJoinThroughJoinRule.java
new file mode 100644
index 0000000..0714bed
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushJoinThroughJoinRule.java
@@ -0,0 +1,37 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel.JoinAlgorithm;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.rules.PushJoinThroughJoinRule;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+
+public class HivePushJoinThroughJoinRule extends PushJoinThroughJoinRule {
+ public static final RelOptRule RIGHT = new HivePushJoinThroughJoinRule(
+ "Hive PushJoinThroughJoinRule:right", true,
+ HiveJoinRel.class);
+ public static final RelOptRule LEFT = new HivePushJoinThroughJoinRule(
+ "Hive PushJoinThroughJoinRule:left", false,
+ HiveJoinRel.class);
+
+ private HivePushJoinThroughJoinRule(String description, boolean right,
+ Class extends JoinRelBase> clazz) {
+ super(description, right, clazz, HiveProjectRel.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ boolean isAMatch = false;
+ final HiveJoinRel topJoin = call.rel(0);
+ final HiveJoinRel bottomJoin = call.rel(1);
+
+ if (!topJoin.isLeftSemiJoin() && topJoin.getJoinAlgorithm() == JoinAlgorithm.NONE
+ && bottomJoin.getJoinAlgorithm() == JoinAlgorithm.NONE) {
+ isAMatch = true;
+ }
+
+ return isAMatch;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveSwapJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveSwapJoinRule.java
new file mode 100644
index 0000000..10a9cb8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveSwapJoinRule.java
@@ -0,0 +1,25 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel.JoinAlgorithm;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+
+import org.eigenbase.rel.rules.SwapJoinRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+
+public class HiveSwapJoinRule extends SwapJoinRule {
+ public static final HiveSwapJoinRule INSTANCE = new HiveSwapJoinRule();
+
+ private HiveSwapJoinRule() {
+ super(HiveJoinRel.class, HiveProjectRel.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ if (call. rel(0).isLeftSemiJoin())
+ return false;
+ else
+ return super.matches(call)
+ && call. rel(0).getJoinAlgorithm() == JoinAlgorithm.NONE;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java
new file mode 100644
index 0000000..d9d94f6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexVisitorImpl;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.util.Pair;
+
+public class PartitionPruner {
+
+ /**
+ * Breaks the predicate into 2 pieces. The first piece is the expressions that
+ * only contain partition columns and can be used for Partition Pruning; the
+ * second piece is the predicates that are left.
+ *
+ * @param cluster
+ * @param hiveTable
+ * @param predicate
+ * @return a Pair of expressions, each of which maybe null. The 1st predicate
+ * is expressions that only contain partition columns; the 2nd
+ * predicate contains the remaining predicates.
+ */
+ public static Pair extractPartitionPredicates(
+ RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
+ RexNode partitionPruningPred = predicate
+ .accept(new ExtractPartPruningPredicate(cluster, hiveTable));
+ RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(
+ cluster, partitionPruningPred));
+ return new Pair(partitionPruningPred, remainingPred);
+ }
+
+ public static class ExtractPartPruningPredicate extends
+ RexVisitorImpl {
+
+ final RelOptHiveTable hiveTable;
+ final RelDataType rType;
+ final Set partCols;
+ final RelOptCluster cluster;
+
+ public ExtractPartPruningPredicate(RelOptCluster cluster,
+ RelOptHiveTable hiveTable) {
+ super(true);
+ this.hiveTable = hiveTable;
+ rType = hiveTable.getRowType();
+ List pfs = hiveTable.getHiveTableMD().getPartCols();
+ partCols = new HashSet();
+ for (FieldSchema pf : pfs) {
+ partCols.add(pf.getName());
+ }
+ this.cluster = cluster;
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+ if (partCols.contains(f.getName())) {
+ return inputRef;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ List args = new LinkedList();
+ boolean argsPruned = false;
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ } else {
+ argsPruned = true;
+ }
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ return argsPruned ? null : call;
+ } else {
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+
+ }
+
+ public static class ExtractRemainingPredicate extends RexVisitorImpl {
+
+ List pruningPredicates;
+ final RelOptCluster cluster;
+
+ public ExtractRemainingPredicate(RelOptCluster cluster,
+ RexNode partPruningExpr) {
+ super(true);
+ this.cluster = cluster;
+ pruningPredicates = new ArrayList();
+ flattenPredicates(partPruningExpr);
+ }
+
+ private void flattenPredicates(RexNode r) {
+ if (r instanceof RexCall
+ && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) {
+ for (RexNode c : ((RexCall) r).getOperands()) {
+ flattenPredicates(c);
+ }
+ } else {
+ pruningPredicates.add(r);
+ }
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ return inputRef;
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ if (pruningPredicates.contains(call)) {
+ return null;
+ } else {
+ return call;
+ }
+ }
+
+ List args = new LinkedList();
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ }
+ }
+
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/CBOTableStatsValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/CBOTableStatsValidator.java
new file mode 100644
index 0000000..370feaa
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/CBOTableStatsValidator.java
@@ -0,0 +1,90 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ForwardWalker;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+
+import com.google.common.collect.ImmutableMap;
+
+public class CBOTableStatsValidator {
+ private final CBOValidateStatsContext m_ctx = new CBOValidateStatsContext();
+
+ public boolean validStats(Operator extends OperatorDesc> sinkOp, ParseContext pCtx) {
+ Map rules = ImmutableMap
+ . builder()
+ .put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"),
+ new TableScanProcessor()).build();
+
+ Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), rules, m_ctx);
+ GraphWalker fWalker = new ForwardWalker(disp);
+
+ ArrayList topNodes = new ArrayList();
+ topNodes.addAll(pCtx.getTopOps().values());
+
+ try {
+ fWalker.startWalking(topNodes, null);
+ } catch (SemanticException e) {
+ throw new RuntimeException(e);
+ }
+
+ return (m_ctx.m_tabsWithIncompleteStats.isEmpty());
+ }
+
+ public String getIncompleteStatsTabNames() {
+ StringBuilder sb = new StringBuilder();
+ for (String tabName : m_ctx.m_tabsWithIncompleteStats) {
+ if (sb.length() > 1)
+ sb.append(", ");
+ sb.append(tabName);
+ }
+ return sb.toString();
+ }
+
+ private static NodeProcessor getDefaultProc() {
+ return new NodeProcessor() {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) {
+ return null;
+ // TODO: Shouldn't we throw exception? as this would imply we got an op
+ // tree with no TS
+ }
+ };
+ }
+
+ static class TableScanProcessor implements NodeProcessor {
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) {
+ TableScanOperator tableScanOp = (TableScanOperator) nd;
+ Statistics stats = tableScanOp.getStatistics();
+ int noColsWithStats = (stats != null && stats.getColumnStats() != null) ? stats
+ .getColumnStats().size() : 0;
+ if (noColsWithStats != tableScanOp.getNeededColumns().size()) {
+ ((CBOValidateStatsContext) procCtx).m_tabsWithIncompleteStats.add(tableScanOp.getConf()
+ .getAlias());
+ }
+ return null;
+ }
+ }
+
+ static class CBOValidateStatsContext implements NodeProcessorCtx {
+ final private HashSet m_tabsWithIncompleteStats = new HashSet();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
new file mode 100644
index 0000000..f8e3238
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
@@ -0,0 +1,214 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.BitSet;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.ProjectRel;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.rex.RexVisitorImpl;
+import org.eigenbase.sql.SqlKind;
+
+public class FilterSelectivityEstimator extends RexVisitorImpl {
+ private final RelNode m_childRel;
+ private final double m_childCardinality;
+
+ protected FilterSelectivityEstimator(RelNode childRel) {
+ super(true);
+ m_childRel = childRel;
+ m_childCardinality = RelMetadataQuery.getRowCount(m_childRel);
+ }
+
+ public Double estimateSelectivity(RexNode predicate) {
+ return predicate.accept(this);
+ }
+
+ public Double visitCall(RexCall call) {
+ if (!deep) {
+ return 1.0;
+ }
+
+ /*
+ * Ignore any predicates on partition columns
+ * because we have already accounted for these in
+ * the Table row count.
+ */
+ if (isPartitionPredicate(call, m_childRel)) {
+ return 1.0;
+ }
+
+ Double selectivity = null;
+ SqlKind op = call.getKind();
+
+ switch (op) {
+ case AND: {
+ selectivity = computeConjunctionSelectivity(call);
+ break;
+ }
+
+ case OR: {
+ selectivity = computeDisjunctionSelectivity(call);
+ break;
+ }
+
+ case NOT_EQUALS: {
+ selectivity = computeNotEqualitySelectivity(call);
+ }
+
+ case LESS_THAN_OR_EQUAL:
+ case GREATER_THAN_OR_EQUAL:
+ case LESS_THAN:
+ case GREATER_THAN: {
+ selectivity = ((double) 1 / (double) 3);
+ break;
+ }
+
+ case IN: {
+ selectivity = ((double) 1 / ((double) call.operands.size()));
+ break;
+ }
+
+ default:
+ selectivity = computeFunctionSelectivity(call);
+ }
+
+ return selectivity;
+ }
+
+ /**
+ * NDV of "f1(x, y, z) != f2(p, q, r)" ->
+ * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
+ *
+ *
+ * @param call
+ * @return
+ */
+ private Double computeNotEqualitySelectivity(RexCall call) {
+ double tmpNDV = getMaxNDV(call);
+
+ if (tmpNDV > 1)
+ return (tmpNDV - (double) 1) / tmpNDV;
+ else
+ return 1.0;
+ }
+
+ /**
+ * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z).
+ *
+ * Note that >, >=, <, <=, = ... are considered generic functions and uses
+ * this method to find their selectivity.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeFunctionSelectivity(RexCall call) {
+ return 1 / getMaxNDV(call);
+ }
+
+ /**
+ * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total
+ * number of tuples from child and m1 and m2 is the expected number of tuples
+ * from each part of the disjunction predicate.
+ *
+ * Note we compute m1. m2.. by applying selectivity of the disjunctive element
+ * on the cardinality from child.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeDisjunctionSelectivity(RexCall call) {
+ Double tmpCardinality;
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode dje : call.getOperands()) {
+ tmpSelectivity = dje.accept(this);
+ if (tmpSelectivity == null) {
+ tmpSelectivity = 0.99;
+ }
+ tmpCardinality = m_childCardinality * tmpSelectivity;
+
+ if (tmpCardinality > 1)
+ tmpSelectivity = (1 - tmpCardinality / m_childCardinality);
+ else
+ tmpSelectivity = 1.0;
+
+ selectivity *= tmpSelectivity;
+ }
+
+ if (selectivity > 1)
+ return (1 - selectivity);
+ else
+ return 1.0;
+ }
+
+ /**
+ * Selectivity of conjunctive predicate -> (selectivity of conjunctive
+ * element1) * (selectivity of conjunctive element2)...
+ *
+ * @param call
+ * @return
+ */
+ private Double computeConjunctionSelectivity(RexCall call) {
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode cje : call.getOperands()) {
+ tmpSelectivity = cje.accept(this);
+ if (tmpSelectivity != null) {
+ selectivity *= tmpSelectivity;
+ }
+ }
+
+ return selectivity;
+ }
+
+ private Double getMaxNDV(RexCall call) {
+ double tmpNDV;
+ double maxNDV = 1.0;
+ InputReferencedVisitor irv;
+
+ for (RexNode op : call.getOperands()) {
+ if (op instanceof RexInputRef) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(m_childRel,
+ ((RexInputRef) op).getIndex());
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ } else {
+ irv = new InputReferencedVisitor();
+ irv.apply(op);
+ for (Integer childProjIndx : irv.inputPosReferenced) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(m_childRel, childProjIndx);
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ }
+ }
+ }
+
+ return maxNDV;
+ }
+
+ private boolean isPartitionPredicate(RexNode expr, RelNode r) {
+ if ( r instanceof ProjectRelBase ) {
+ expr = RelOptUtil.pushFilterPastProject(expr, (ProjectRelBase) r);
+ return isPartitionPredicate(expr, ((ProjectRelBase) r).getChild());
+ } else if ( r instanceof FilterRelBase ) {
+ isPartitionPredicate(expr, ((ProjectRelBase) r).getChild());
+ } else if ( r instanceof HiveTableScanRel ) {
+ RelOptHiveTable table = (RelOptHiveTable)
+ ((HiveTableScanRel)r).getTable();
+ BitSet cols = RelOptUtil.InputFinder.bits(expr);
+ return table.containsPartitionColumnsOnly(cols);
+ }
+ return false;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java
new file mode 100644
index 0000000..f3e91bb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java
@@ -0,0 +1,110 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.BitSet;
+import java.util.List;
+
+import net.hydromatic.optiq.BuiltinMethod;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
+import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMdDistinctRowCount;
+import org.eigenbase.rel.metadata.RelMdUtil;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.rex.RexNode;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
+
+ private static final HiveRelMdDistinctRowCount INSTANCE =
+ new HiveRelMdDistinctRowCount();
+
+ public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider
+ .of(ImmutableList.of(
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltinMethod.DISTINCT_ROW_COUNT.method, INSTANCE),
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltinMethod.CUMULATIVE_COST.method, INSTANCE)));
+
+ private HiveRelMdDistinctRowCount() {
+ }
+
+ // Catch-all rule when none of the others apply.
+ @Override
+ public Double getDistinctRowCount(RelNode rel, BitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveTableScanRel) {
+ return getDistinctRowCount((HiveTableScanRel) rel, groupKey, predicate);
+ }
+ /*
+ * For now use Optiq' default formulas for propagating NDVs up the Query
+ * Tree.
+ */
+ return super.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ private Double getDistinctRowCount(HiveTableScanRel htRel, BitSet groupKey,
+ RexNode predicate) {
+ List projIndxLst = HiveOptiqUtil
+ .translateBitSetToProjIndx(groupKey);
+ List colStats = htRel.getColStat(projIndxLst);
+ Double noDistinctRows = 1.0;
+ for (ColStatistics cStat : colStats) {
+ noDistinctRows *= cStat.getCountDistint();
+ }
+
+ return Math.min(noDistinctRows, htRel.getRows());
+ }
+
+ public static Double getDistinctRowCount(RelNode r, int indx) {
+ BitSet bitSetOfRqdProj = new BitSet();
+ bitSetOfRqdProj.set(indx);
+ return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r
+ .getCluster().getRexBuilder().makeLiteral(true));
+ }
+
+ @Override
+ public Double getDistinctRowCount(JoinRelBase rel, BitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveJoinRel) {
+ HiveJoinRel hjRel = (HiveJoinRel) rel;
+ //TODO: Improve this
+ if (hjRel.isLeftSemiJoin()) {
+ return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey,
+ rel.getCluster().getRexBuilder().makeLiteral(true));
+ } else {
+ return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(),
+ groupKey, predicate, true);
+ }
+ }
+
+ return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ /*
+ * Favor Broad Plans over Deep Plans.
+ */
+ public RelOptCost getCumulativeCost(HiveJoinRel rel) {
+ RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);
+ List inputs = rel.getInputs();
+ RelOptCost maxICost = HiveCost.ZERO;
+ for (RelNode input : inputs) {
+ RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input);
+ if (maxICost.isLt(iCost)) {
+ maxICost = iCost;
+ }
+ }
+ return cost.plus(maxICost);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
new file mode 100644
index 0000000..df70de2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
@@ -0,0 +1,184 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.hydromatic.optiq.BuiltinMethod;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.JoinUtil.JoinLeafPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.optiq.JoinUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMdSelectivity;
+import org.eigenbase.rel.metadata.RelMdUtil;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveRelMdSelectivity extends RelMdSelectivity {
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltinMethod.SELECTIVITY.method,
+ new HiveRelMdSelectivity());
+
+ protected HiveRelMdSelectivity() {
+ super();
+ }
+
+ public Double getSelectivity(HiveTableScanRel t, RexNode predicate) {
+ if (predicate != null) {
+ FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t);
+ return filterSelEstmator.estimateSelectivity(predicate);
+ }
+
+ return 1.0;
+ }
+
+ public Double getSelectivity(HiveJoinRel j, RexNode predicate) {
+ if (j.getJoinType().equals(JoinRelType.INNER)) {
+ return computeInnerJoinSelectivity(j, predicate);
+ }
+ return 1.0;
+ }
+
+ private Double computeInnerJoinSelectivity(HiveJoinRel j, RexNode predicate) {
+ double ndvCrossProduct = 1;
+ RexNode combinedPredicate = getCombinedPredicateForJoin(j, predicate);
+ JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j,
+ combinedPredicate);
+ ImmutableMap.Builder colStatMapBuilder = ImmutableMap
+ .builder();
+ ImmutableMap colStatMap;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ // 1. Update Col Stats Map with col stats for columns from left side of
+ // Join which are part of join keys
+ for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(ljk,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk));
+ }
+
+ // 2. Update Col Stats Map with col stats for columns from right side of
+ // Join which are part of join keys
+ for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(rjk + rightOffSet,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk));
+ }
+ colStatMap = colStatMapBuilder.build();
+
+ // 3. Walk through the Join Condition Building NDV for selectivity
+ // NDV of the join can not exceed the cardinality of cross join.
+ List peLst = jpi.getEquiJoinPredicateElements();
+ int noOfPE = peLst.size();
+ if (noOfPE > 0) {
+ // 3.1 Use first conjunctive predicate element's NDV as the seed
+ ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+
+ // 3.2 if conjunctive predicate elements are more than one, then walk
+ // through them one by one. Compute cross product of NDV. Cross product is
+ // computed by multiplying the largest NDV of all of the conjunctive
+ // predicate
+ // elements with degraded NDV of rest of the conjunctive predicate
+ // elements. NDV is
+ // degraded using log function.Finally the ndvCrossProduct is fenced at
+ // the join
+ // cross product to ensure that NDV can not exceed worst case join
+ // cardinality.
+ // NDV of a conjunctive predicate element is the max NDV of all arguments
+ // to lhs, rhs expressions.
+ // NDV(JoinCondition) = min (left cardinality * right cardinality,
+ // ndvCrossProduct(JoinCondition))
+ // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
+ // where pex is the predicate element of join condition with max ndv.
+ // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
+ // NDV(expr) = max(NDV( expr args))
+ if (noOfPE > 1) {
+ double maxNDVSoFar = ndvCrossProduct;
+ double ndvToBeSmoothed;
+ double tmpNDV;
+
+ for (int i = 1; i < noOfPE; i++) {
+ tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
+ if (tmpNDV > maxNDVSoFar) {
+ ndvToBeSmoothed = maxNDVSoFar;
+ maxNDVSoFar = tmpNDV;
+ ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
+ } else {
+ ndvToBeSmoothed = tmpNDV;
+ }
+ // TODO: revisit the fence
+ if (ndvToBeSmoothed > 3)
+ ndvCrossProduct *= Math.log(ndvToBeSmoothed);
+ else
+ ndvCrossProduct *= ndvToBeSmoothed;
+ }
+
+ if (j.isLeftSemiJoin())
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+ ndvCrossProduct);
+ else
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
+ * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+ }
+ }
+
+ // 4. Join Selectivity = 1/NDV
+ return (1 / ndvCrossProduct);
+ }
+
+ private RexNode getCombinedPredicateForJoin(HiveJoinRel j, RexNode additionalPredicate) {
+ RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate,
+ j.getCondition());
+
+ if (minusPred != null) {
+ List minusList = new ArrayList();
+ minusList.add(j.getCondition());
+ minusList.add(minusPred);
+
+ return RexUtil.composeConjunction(j.getCluster().getRexBuilder(), minusList, true);
+ }
+
+ return j.getCondition();
+ }
+
+ /**
+ * Compute Max NDV to determine Join Selectivity.
+ *
+ * @param jlpi
+ * @param colStatMap
+ * Immutable Map of Projection Index (in Join Schema) to Column Stat
+ * @param rightProjOffSet
+ * @return
+ */
+ private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi,
+ ImmutableMap colStatMap) {
+ Double maxNDVSoFar = 1.0;
+
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+
+ return maxNDVSoFar;
+ }
+
+ private static Double getMaxNDVFromProjections(Map colStatMap,
+ Set projectionSet, Double defaultMaxNDV) {
+ Double colNDV = null;
+ Double maxNDVSoFar = defaultMaxNDV;
+
+ for (Integer projIndx : projectionSet) {
+ colNDV = colStatMap.get(projIndx);
+ if (colNDV > maxNDVSoFar)
+ maxNDVSoFar = colNDV;
+ }
+
+ return maxNDVSoFar;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java
new file mode 100644
index 0000000..f9c0aeb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java
@@ -0,0 +1,200 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.sql.Date;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.sql.type.SqlTypeName;
+
+class ASTBuilder {
+
+ static ASTBuilder construct(int tokenType, String text) {
+ ASTBuilder b = new ASTBuilder();
+ b.curr = createAST(tokenType, text);
+ return b;
+ }
+
+ static ASTNode createAST(int tokenType, String text) {
+ return (ASTNode) ParseDriver.adaptor.create(tokenType, text);
+ }
+
+ static ASTNode destNode() {
+ return ASTBuilder
+ .construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_DIR, "TOK_DIR").add(HiveParser.TOK_TMP_FILE,
+ "TOK_TMP_FILE")).node();
+ }
+
+ static ASTNode table(TableAccessRelBase scan) {
+ RelOptHiveTable hTbl = (RelOptHiveTable) scan.getTable();
+ ASTBuilder b = ASTBuilder
+ .construct(HiveParser.TOK_TABREF, "TOK_TABREF")
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME")
+ .add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName())
+ .add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName()))
+ .add(HiveParser.Identifier, hTbl.getName());
+ return b.node();
+ }
+
+ static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
+ boolean semiJoin) {
+ ASTBuilder b = null;
+
+ switch (joinType) {
+ case INNER:
+ if (semiJoin) {
+ b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
+ } else {
+ b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN");
+ }
+ break;
+ case LEFT:
+ b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN");
+ break;
+ case RIGHT:
+ b = ASTBuilder.construct(HiveParser.TOK_RIGHTOUTERJOIN, "TOK_RIGHTOUTERJOIN");
+ break;
+ case FULL:
+ b = ASTBuilder.construct(HiveParser.TOK_FULLOUTERJOIN, "TOK_FULLOUTERJOIN");
+ break;
+ }
+
+ b.add(left).add(right).add(cond);
+ return b.node();
+ }
+
+ static ASTNode subQuery(ASTNode qry, String alias) {
+ return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry)
+ .add(HiveParser.Identifier, alias).node();
+ }
+
+ static ASTNode qualifiedName(String tableName, String colName) {
+ ASTBuilder b = ASTBuilder
+ .construct(HiveParser.DOT, ".")
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
+ HiveParser.Identifier, tableName)).add(HiveParser.Identifier, colName);
+ return b.node();
+ }
+
+ static ASTNode unqualifiedName(String colName) {
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
+ HiveParser.Identifier, colName);
+ return b.node();
+ }
+
+ static ASTNode where(ASTNode cond) {
+ return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node();
+ }
+
+ static ASTNode having(ASTNode cond) {
+ return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
+ }
+
+ static ASTNode limit(Object value) {
+ return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
+ .add(HiveParser.Number, value.toString()).node();
+ }
+
+ static ASTNode selectExpr(ASTNode expr, String alias) {
+ return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr)
+ .add(HiveParser.Identifier, alias).node();
+ }
+
+ static ASTNode literal(RexLiteral literal) {
+ Object val = null;
+ int type = 0;
+ SqlTypeName sqlType = literal.getType().getSqlTypeName();
+
+ switch (sqlType) {
+ case TINYINT:
+ val = literal.getValue3();
+ type = HiveParser.TinyintLiteral;
+ break;
+ case SMALLINT:
+ val = literal.getValue3();
+ type = HiveParser.SmallintLiteral;
+ break;
+ case INTEGER:
+ case BIGINT:
+ val = literal.getValue3();
+ type = HiveParser.BigintLiteral;
+ break;
+ case DECIMAL:
+ case FLOAT:
+ case DOUBLE:
+ case REAL:
+ val = literal.getValue3();
+ type = HiveParser.Number;
+ break;
+ case VARCHAR:
+ case CHAR:
+ val = literal.getValue3();
+ type = HiveParser.StringLiteral;
+ val = "'" + String.valueOf(val) + "'";
+ break;
+ case BOOLEAN:
+ val = literal.getValue3();
+ type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE;
+ break;
+ case DATE: {
+ val = literal.getValue();
+ type = HiveParser.TOK_DATELITERAL;
+ DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
+ val = df.format(((Calendar) val).getTime());
+ val = "'" + val + "'";
+ }
+ break;
+ case TIME:
+ case TIMESTAMP: {
+ val = literal.getValue();
+ type = HiveParser.TOK_TIMESTAMP;
+ DateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
+ val = df.format(((Calendar) val).getTime());
+ val = "'" + val + "'";
+ }
+ break;
+ case NULL:
+ type = HiveParser.TOK_NULL;
+ break;
+
+ default:
+ throw new RuntimeException("Unsupported Type: " + sqlType);
+ }
+
+ return (ASTNode) ParseDriver.adaptor.create(type, String.valueOf(val));
+ }
+
+ ASTNode curr;
+
+ ASTNode node() {
+ return curr;
+ }
+
+ ASTBuilder add(int tokenType, String text) {
+ ParseDriver.adaptor.addChild(curr, createAST(tokenType, text));
+ return this;
+ }
+
+ ASTBuilder add(ASTBuilder b) {
+ ParseDriver.adaptor.addChild(curr, b.curr);
+ return this;
+ }
+
+ ASTBuilder add(ASTNode n) {
+ if (n != null) {
+ ParseDriver.adaptor.addChild(curr, n);
+ }
+ return this;
+ }
+}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java
new file mode 100644
index 0000000..cfa032c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java
@@ -0,0 +1,584 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter.HiveToken;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelFieldCollation;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.RelVisitor;
+import org.eigenbase.rel.SortRel;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.rel.UnionRelBase;
+import org.eigenbase.rel.rules.SemiJoinRel;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexFieldCollation;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexOver;
+import org.eigenbase.rex.RexVisitorImpl;
+import org.eigenbase.rex.RexWindow;
+import org.eigenbase.rex.RexWindowBound;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.type.BasicSqlType;
+import org.eigenbase.sql.type.SqlTypeName;
+
+import com.google.common.collect.Iterables;
+
+public class ASTConverter {
+
+ RelNode root;
+ HiveAST hiveAST;
+ RelNode from;
+ FilterRelBase where;
+ AggregateRelBase groupBy;
+ FilterRelBase having;
+ ProjectRelBase select;
+ SortRel order;
+
+ Schema schema;
+
+ ASTConverter(RelNode root) {
+ this.root = root;
+ hiveAST = new HiveAST();
+ }
+
+ public static ASTNode convert(final RelNode relNode, List resultSchema) {
+ SortRel sortrel = null;
+ RelNode root = DerivedTableInjector.convertOpTree(relNode, resultSchema);
+
+ if (root instanceof SortRel) {
+ sortrel = (SortRel) root;
+ root = sortrel.getChild();
+ if (!(root instanceof ProjectRelBase))
+ throw new RuntimeException("Child of root sort node is not a project");
+ }
+
+ ASTConverter c = new ASTConverter(root);
+ return c.convert(sortrel);
+ }
+
+ public ASTNode convert(SortRel sortrel) {
+ /*
+ * 1. Walk RelNode Graph; note from, where, gBy.. nodes.
+ */
+ new QBVisitor().go(root);
+
+ /*
+ * 2. convert from node.
+ */
+ QueryBlockInfo qb = convertSource(from);
+ schema = qb.schema;
+ hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node();
+
+ /*
+ * 3. convert filterNode
+ */
+ if (where != null) {
+ ASTNode cond = where.getCondition().accept(new RexVisitor(schema));
+ hiveAST.where = ASTBuilder.where(cond);
+ }
+
+ /*
+ * 4. GBy
+ */
+ if (groupBy != null) {
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY");
+ for (int i : BitSets.toIter(groupBy.getGroupSet())) {
+ RexInputRef iRef = new RexInputRef(i, new BasicSqlType(SqlTypeName.ANY));
+ b.add(iRef.accept(new RexVisitor(schema)));
+ }
+
+ if (!groupBy.getGroupSet().isEmpty())
+ hiveAST.groupBy = b.node();
+ schema = new Schema(schema, groupBy);
+ }
+
+ /*
+ * 5. Having
+ */
+ if (having != null) {
+ ASTNode cond = having.getCondition().accept(new RexVisitor(schema));
+ hiveAST.having = ASTBuilder.having(cond);
+ }
+
+ /*
+ * 6. Project
+ */
+ ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT");
+ int i = 0;
+
+ for (RexNode r : select.getChildExps()) {
+ ASTNode selectExpr = ASTBuilder.selectExpr(r.accept(new RexVisitor(schema)), select
+ .getRowType().getFieldNames().get(i++));
+ b.add(selectExpr);
+ }
+ hiveAST.select = b.node();
+
+ /*
+ * 7. Order Use in Order By from the block above. RelNode has no pointer to
+ * parent hence we need to go top down; but OB at each block really belong
+ * to its src/from. Hence the need to pass in sortRel for each block from
+ * its parent.
+ */
+ if (sortrel != null) {
+ HiveSortRel hiveSort = (HiveSortRel) sortrel;
+ if (!hiveSort.getCollation().getFieldCollations().isEmpty()) {
+ ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
+ schema = new Schema((HiveSortRel) sortrel);
+ for (RelFieldCollation c : hiveSort.getCollation().getFieldCollations()) {
+ ColumnInfo cI = schema.get(c.getFieldIndex());
+ /*
+ * The RowResolver setup for Select drops Table associations. So setup
+ * ASTNode on unqualified name.
+ */
+ ASTNode astCol = ASTBuilder.unqualifiedName(cI.column);
+ ASTNode astNode = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC");
+ astNode.addChild(astCol);
+ orderAst.addChild(astNode);
+ }
+ hiveAST.order = orderAst;
+ }
+ RexNode limitExpr = hiveSort.getFetchExpr();
+ if (limitExpr != null) {
+ Object val = ((RexLiteral) limitExpr).getValue2();
+ hiveAST.limit = ASTBuilder.limit(val);
+ }
+
+ }
+
+ return hiveAST.getAST();
+ }
+
+ private Schema getRowSchema(String tblAlias) {
+ return new Schema(select, tblAlias);
+ }
+
+ private QueryBlockInfo convertSource(RelNode r) {
+ Schema s;
+ ASTNode ast;
+
+ if (r instanceof TableAccessRelBase) {
+ TableAccessRelBase f = (TableAccessRelBase) r;
+ s = new Schema(f);
+ ast = ASTBuilder.table(f);
+ } else if (r instanceof JoinRelBase) {
+ JoinRelBase join = (JoinRelBase) r;
+ QueryBlockInfo left = convertSource(join.getLeft());
+ QueryBlockInfo right = convertSource(join.getRight());
+ s = new Schema(left.schema, right.schema);
+ ASTNode cond = join.getCondition().accept(new RexVisitor(s));
+ boolean semiJoin = join instanceof SemiJoinRel;
+ ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin);
+ if (semiJoin)
+ s = left.schema;
+ } else if (r instanceof UnionRelBase) {
+ RelNode leftInput = ((UnionRelBase) r).getInput(0);
+ RelNode rightInput = ((UnionRelBase) r).getInput(1);
+
+ ASTConverter leftConv = new ASTConverter(leftInput);
+ ASTConverter rightConv = new ASTConverter(rightInput);
+ ASTNode leftAST = leftConv.convert((SortRel) null);
+ ASTNode rightAST = rightConv.convert((SortRel) null);
+
+ ASTNode unionAST = getUnionAllAST(leftAST, rightAST);
+
+ String sqAlias = ASTConverter.nextAlias();
+ ast = ASTBuilder.subQuery(unionAST, sqAlias);
+ s = new Schema((UnionRelBase) r, sqAlias);
+ } else {
+ ASTConverter src = new ASTConverter(r);
+ ASTNode srcAST = src.convert(order);
+ String sqAlias = ASTConverter.nextAlias();
+ s = src.getRowSchema(sqAlias);
+ ast = ASTBuilder.subQuery(srcAST, sqAlias);
+ }
+ return new QueryBlockInfo(s, ast);
+ }
+
+ class QBVisitor extends RelVisitor {
+
+ public void handle(FilterRelBase filter) {
+ RelNode child = filter.getChild();
+ if (child instanceof AggregateRelBase && !((AggregateRelBase) child).getGroupSet().isEmpty()) {
+ ASTConverter.this.having = filter;
+ } else {
+ ASTConverter.this.where = filter;
+ }
+ }
+
+ public void handle(ProjectRelBase project) {
+ if (ASTConverter.this.select == null) {
+ ASTConverter.this.select = project;
+ } else {
+ ASTConverter.this.from = project;
+ }
+ }
+
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+
+ if (node instanceof TableAccessRelBase) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof FilterRelBase) {
+ handle((FilterRelBase) node);
+ } else if (node instanceof ProjectRelBase) {
+ handle((ProjectRelBase) node);
+ } else if (node instanceof JoinRelBase) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof UnionRelBase) {
+ ASTConverter.this.from = node;
+ } else if (node instanceof AggregateRelBase) {
+ ASTConverter.this.groupBy = (AggregateRelBase) node;
+ } else if (node instanceof SortRel) {
+ ASTConverter.this.order = (SortRel) node;
+ }
+ /*
+ * once the source node is reached; stop traversal for this QB
+ */
+ if (ASTConverter.this.from == null) {
+ node.childrenAccept(this);
+ }
+ }
+
+ }
+
+ static class RexVisitor extends RexVisitorImpl {
+
+ private final Schema schema;
+
+ protected RexVisitor(Schema schema) {
+ super(true);
+ this.schema = schema;
+ }
+
+ @Override
+ public ASTNode visitInputRef(RexInputRef inputRef) {
+ ColumnInfo cI = schema.get(inputRef.getIndex());
+ if (cI.agg != null) {
+ return (ASTNode) ParseDriver.adaptor.dupTree(cI.agg);
+ }
+ return ASTBuilder.qualifiedName(cI.table, cI.column);
+ }
+
+ @Override
+ public ASTNode visitLiteral(RexLiteral literal) {
+ return ASTBuilder.literal(literal);
+ }
+
+ private ASTNode getPSpecAST(RexWindow window) {
+ ASTNode pSpecAst = null;
+
+ ASTNode dByAst = null;
+ if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) {
+ dByAst = ASTBuilder.createAST(HiveParser.TOK_DISTRIBUTEBY, "TOK_DISTRIBUTEBY");
+ for (RexNode pk : window.partitionKeys) {
+ ASTNode astCol = pk.accept(this);
+ dByAst.addChild(astCol);
+ }
+ }
+
+ ASTNode oByAst = null;
+ if (window.orderKeys != null && !window.orderKeys.isEmpty()) {
+ oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
+ for (RexFieldCollation ok : window.orderKeys) {
+ ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder
+ .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC");
+ ASTNode astCol = ok.left.accept(this);
+ astNode.addChild(astCol);
+ oByAst.addChild(astNode);
+ }
+ }
+
+ if (dByAst != null || oByAst != null) {
+ pSpecAst = ASTBuilder.createAST(HiveParser.TOK_PARTITIONINGSPEC, "TOK_PARTITIONINGSPEC");
+ if (dByAst != null)
+ pSpecAst.addChild(dByAst);
+ if (oByAst != null)
+ pSpecAst.addChild(oByAst);
+ }
+
+ return pSpecAst;
+ }
+
+ private ASTNode getWindowBound(RexWindowBound wb) {
+ ASTNode wbAST = null;
+
+ if (wb.isCurrentRow()) {
+ wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT");
+ } else {
+ if (wb.isPreceding())
+ wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING");
+ else
+ wbAST = ASTBuilder.createAST(HiveParser.KW_FOLLOWING, "FOLLOWING");
+ if (wb.isUnbounded()) {
+ wbAST.addChild(ASTBuilder.createAST(HiveParser.KW_UNBOUNDED, "UNBOUNDED"));
+ } else {
+ ASTNode offset = wb.getOffset().accept(this);
+ wbAST.addChild(offset);
+ }
+ }
+
+ return wbAST;
+ }
+
+ private ASTNode getWindowRangeAST(RexWindow window) {
+ ASTNode wRangeAst = null;
+
+ ASTNode startAST = null;
+ RexWindowBound ub = window.getUpperBound();
+ if (ub != null) {
+ startAST = getWindowBound(ub);
+ }
+
+ ASTNode endAST = null;
+ RexWindowBound lb = window.getLowerBound();
+ if (lb != null) {
+ endAST = getWindowBound(lb);
+ }
+
+ if (startAST != null || endAST != null) {
+ // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical)
+ if (window.isRows())
+ wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE");
+ else
+ wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES");
+ if (startAST != null)
+ wRangeAst.addChild(startAST);
+ if (endAST != null)
+ wRangeAst.addChild(endAST);
+ }
+
+ return wRangeAst;
+ }
+
+ @Override
+ public ASTNode visitOver(RexOver over) {
+ if (!deep) {
+ return null;
+ }
+
+ // 1. Translate the UDAF
+ final ASTNode wUDAFAst = visitCall(over);
+
+ // 2. Add TOK_WINDOW as child of UDAF
+ ASTNode wSpec = ASTBuilder.createAST(HiveParser.TOK_WINDOWSPEC, "TOK_WINDOWSPEC");
+ wUDAFAst.addChild(wSpec);
+
+ // 3. Add Part Spec & Range Spec as child of TOK_WINDOW
+ final RexWindow window = over.getWindow();
+ final ASTNode wPSpecAst = getPSpecAST(window);
+ final ASTNode wRangeAst = getWindowRangeAST(window);
+ if (wPSpecAst != null)
+ wSpec.addChild(wPSpecAst);
+ if (wRangeAst != null)
+ wSpec.addChild(wRangeAst);
+
+ return wUDAFAst;
+ }
+
+ @Override
+ public ASTNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ SqlOperator op = call.getOperator();
+ List astNodeLst = new LinkedList();
+ if (op.kind == SqlKind.CAST) {
+ HiveToken ht = TypeConverter.hiveToken(call.getType());
+ ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text);
+ if (ht.args != null) {
+ for (String castArg : ht.args)
+ astBldr.add(HiveParser.Identifier, castArg);
+ }
+ astNodeLst.add(astBldr.node());
+ }
+
+ for (RexNode operand : call.operands) {
+ astNodeLst.add(operand.accept(this));
+ }
+
+ if (isFlat(call))
+ return SqlFunctionConverter.buildAST(op, astNodeLst, 0);
+ else
+ return SqlFunctionConverter.buildAST(op, astNodeLst);
+ }
+ }
+
+ static class QueryBlockInfo {
+ Schema schema;
+ ASTNode ast;
+
+ public QueryBlockInfo(Schema schema, ASTNode ast) {
+ super();
+ this.schema = schema;
+ this.ast = ast;
+ }
+ }
+
+ /*
+ * represents the schema exposed by a QueryBlock.
+ */
+ static class Schema extends ArrayList {
+
+ private static final long serialVersionUID = 1L;
+
+ Schema(TableAccessRelBase scan) {
+ String tabName = scan.getTable().getQualifiedName().get(0);
+ for (RelDataTypeField field : scan.getRowType().getFieldList()) {
+ add(new ColumnInfo(tabName, field.getName()));
+ }
+ }
+
+ Schema(ProjectRelBase select, String alias) {
+ for (RelDataTypeField field : select.getRowType().getFieldList()) {
+ add(new ColumnInfo(alias, field.getName()));
+ }
+ }
+
+ Schema(UnionRelBase unionRel, String alias) {
+ for (RelDataTypeField field : unionRel.getRowType().getFieldList()) {
+ add(new ColumnInfo(alias, field.getName()));
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ Schema(Schema left, Schema right) {
+ for (ColumnInfo cI : Iterables.concat(left, right)) {
+ add(cI);
+ }
+ }
+
+ Schema(Schema src, AggregateRelBase gBy) {
+ for (int i : BitSets.toIter(gBy.getGroupSet())) {
+ ColumnInfo cI = src.get(i);
+ add(cI);
+ }
+ List aggs = gBy.getAggCallList();
+ for (AggregateCall agg : aggs) {
+ int argCount = agg.getArgList().size();
+ ASTBuilder b = agg.isDistinct() ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONDI,
+ "TOK_FUNCTIONDI") : argCount == 0 ? ASTBuilder.construct(HiveParser.TOK_FUNCTIONSTAR,
+ "TOK_FUNCTIONSTAR") : ASTBuilder.construct(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+ b.add(HiveParser.Identifier, agg.getAggregation().getName());
+ for (int i : agg.getArgList()) {
+ RexInputRef iRef = new RexInputRef(i, new BasicSqlType(SqlTypeName.ANY));
+ b.add(iRef.accept(new RexVisitor(src)));
+ }
+ add(new ColumnInfo(null, b.node()));
+ }
+ }
+
+ /**
+ * Assumption:
+ * 1. ProjectRel will always be child of SortRel.
+ * 2. In Optiq every projection in ProjectRelBase is uniquely named
+ * (unambigous) without using table qualifier (table name).
+ *
+ * @param order
+ * Hive Sort Rel Node
+ * @return Schema
+ */
+ public Schema(HiveSortRel order) {
+ ProjectRelBase select = (ProjectRelBase) order.getChild();
+ for (String projName : select.getRowType().getFieldNames()) {
+ add(new ColumnInfo(null, projName));
+ }
+ }
+ }
+
+ /*
+ * represents Column information exposed by a QueryBlock.
+ */
+ static class ColumnInfo {
+ String table;
+ String column;
+ ASTNode agg;
+
+ ColumnInfo(String table, String column) {
+ super();
+ this.table = table;
+ this.column = column;
+ }
+
+ ColumnInfo(String table, ASTNode agg) {
+ super();
+ this.table = table;
+ this.agg = agg;
+ }
+
+ ColumnInfo(String alias, ColumnInfo srcCol) {
+ this.table = alias;
+ this.column = srcCol.column;
+ this.agg = srcCol.agg;
+ }
+ }
+
+ static String nextAlias() {
+ return String.format("$hdt$_%d", derivedTableCounter.getAndIncrement());
+ }
+
+ private static AtomicLong derivedTableCounter = new AtomicLong(0);
+
+ static class HiveAST {
+
+ ASTNode from;
+ ASTNode where;
+ ASTNode groupBy;
+ ASTNode having;
+ ASTNode select;
+ ASTNode order;
+ ASTNode limit;
+
+ public ASTNode getAST() {
+ ASTBuilder b = ASTBuilder
+ .construct(HiveParser.TOK_QUERY, "TOK_QUERY")
+ .add(from)
+ .add(
+ ASTBuilder.construct(HiveParser.TOK_INSERT, "TOK_INSERT").add(ASTBuilder.destNode())
+ .add(select).add(where).add(groupBy).add(having).add(order).add(limit));
+ return b.node();
+ }
+ }
+
+ public ASTNode getUnionAllAST(ASTNode leftAST, ASTNode rightAST) {
+
+ ASTNode unionTokAST = ASTBuilder.construct(HiveParser.TOK_UNION, "TOK_UNION").add(leftAST)
+ .add(rightAST).node();
+
+ return unionTokAST;
+ }
+
+ public static boolean isFlat(RexCall call) {
+ boolean flat = false;
+ if (call.operands != null && call.operands.size() > 2) {
+ SqlOperator op = call.getOperator();
+ if (op.getKind() == SqlKind.AND || op.getKind() == SqlKind.OR) {
+ flat = true;
+ }
+ }
+
+ return flat;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java
new file mode 100644
index 0000000..dd2bf22
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java
@@ -0,0 +1,254 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.EmptyRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.OneRowRelBase;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SetOpRel;
+import org.eigenbase.rel.SingleRel;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.rel.TableFunctionRelBase;
+import org.eigenbase.rel.UnionRelBase;
+import org.eigenbase.rel.ValuesRelBase;
+import org.eigenbase.rel.rules.MultiJoinRel;
+import org.eigenbase.relopt.hep.HepRelVertex;
+import org.eigenbase.relopt.volcano.RelSubset;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+public class DerivedTableInjector {
+
+ public static RelNode convertOpTree(RelNode rel, List resultSchema) {
+ // Disable introducing top level select since Hive seems to have bugs with
+ // OB, Limit in sub query.
+ // RelNode newTopSelect = introduceTopLevelSelectInResultSchema(rel,
+ // resultSchema);
+ RelNode newTopNode = rel;
+
+ // NOTE: Hive requires Union to buried in Project (TOK_QUERY,
+ // TOK_SUBQUERY, TOK_UNION)
+ if (newTopNode instanceof UnionRelBase) {
+ newTopNode = introduceDerivedTable(newTopNode);
+ }
+
+ convertOpTree(newTopNode, (RelNode) null);
+
+ return newTopNode;
+ }
+
+ private static void convertOpTree(RelNode rel, RelNode parent) {
+
+ if (rel instanceof EmptyRel) {
+ throw new RuntimeException("Found Empty Rel");
+ } else if (rel instanceof HepRelVertex) {
+ throw new RuntimeException("Found HepRelVertex");
+ } else if (rel instanceof JoinRelBase) {
+ if (!validJoinParent(rel, parent)) {
+ introduceDerivedTable(rel, parent);
+ }
+ } else if (rel instanceof MultiJoinRel) {
+ throw new RuntimeException("Found MultiJoinRel");
+ } else if (rel instanceof OneRowRelBase) {
+ throw new RuntimeException("Found OneRowRelBase");
+ } else if (rel instanceof RelSubset) {
+ throw new RuntimeException("Found RelSubset");
+ } else if (rel instanceof SetOpRel) {
+ // TODO: Handle more than 2 inputs for setop
+ if (!validSetopParent(rel, parent))
+ introduceDerivedTable(rel, parent);
+
+ SetOpRel setopRel = (SetOpRel) rel;
+ for (RelNode inputRel : setopRel.getInputs()) {
+ if (!validSetopChild(inputRel)) {
+ introduceDerivedTable(inputRel, setopRel);
+ }
+ }
+ } else if (rel instanceof SingleRel) {
+ if (rel instanceof FilterRelBase) {
+ if (!validFilterParent(rel, parent)) {
+ introduceDerivedTable(rel, parent);
+ }
+ } else if (rel instanceof HiveSortRel) {
+ if (!validSortParent(rel, parent)) {
+ introduceDerivedTable(rel, parent);
+ }
+ if (!validSortChild((HiveSortRel) rel)) {
+ introduceDerivedTable(((HiveSortRel) rel).getChild(), rel);
+ }
+ } else if (rel instanceof HiveAggregateRel) {
+ if (!validGBParent(rel, parent)) {
+ introduceDerivedTable(rel, parent);
+ }
+ }
+ } else if (rel instanceof TableAccessRelBase) {
+
+ } else if (rel instanceof TableFunctionRelBase) {
+
+ } else if (rel instanceof ValuesRelBase) {
+
+ }
+
+ List childNodes = rel.getInputs();
+ if (childNodes != null) {
+ for (RelNode r : childNodes) {
+ convertOpTree(r, rel);
+ }
+ }
+ }
+
+ private static HiveProjectRel introduceTopLevelSelectInResultSchema(final RelNode rootRel,
+ List resultSchema) {
+ RelNode curNode = rootRel;
+ HiveProjectRel rootProjRel = null;
+ while (curNode != null) {
+ if (curNode instanceof HiveProjectRel) {
+ rootProjRel = (HiveProjectRel) curNode;
+ break;
+ }
+ curNode = curNode.getInput(0);
+ }
+
+ // Assumption: tree could only be (limit)?(OB)?(ProjectRelBase)....
+ List rootChildExps = rootProjRel.getChildExps();
+ if (resultSchema.size() != rootChildExps.size()) {
+ throw new RuntimeException("Result Schema didn't match Optiq Optimized Op Tree Schema");
+ }
+
+ List newSelExps = new ArrayList();
+ List newSelAliases = new ArrayList();
+ for (int i = 0; i < rootChildExps.size(); i++) {
+ newSelExps.add(new RexInputRef(i, rootChildExps.get(i).getType()));
+ newSelAliases.add(resultSchema.get(i).getName());
+ }
+
+ return HiveProjectRel.create(rootRel, newSelExps, newSelAliases);
+ }
+
+ private static RelNode introduceDerivedTable(final RelNode rel) {
+ List projectList = HiveOptiqUtil.getProjsFromBelowAsInputRef(rel);
+
+ HiveProjectRel select = HiveProjectRel.create(rel.getCluster(), rel, projectList,
+ rel.getRowType(), rel.getCollationList());
+
+ return select;
+ }
+
+ private static void introduceDerivedTable(final RelNode rel, RelNode parent) {
+ int i = 0;
+ int pos = -1;
+ List childList = parent.getInputs();
+
+ for (RelNode child : childList) {
+ if (child == rel) {
+ pos = i;
+ break;
+ }
+ i++;
+ }
+
+ if (pos == -1) {
+ throw new RuntimeException("Couldn't find child node in parent's inputs");
+ }
+
+ RelNode select = introduceDerivedTable(rel);
+
+ parent.replaceInput(pos, select);
+ }
+
+ private static boolean validJoinParent(RelNode joinNode, RelNode parent) {
+ boolean validParent = true;
+
+ if (parent instanceof JoinRelBase) {
+ if (((JoinRelBase) parent).getRight() == joinNode) {
+ validParent = false;
+ }
+ } else if (parent instanceof SetOpRel) {
+ validParent = false;
+ }
+
+ return validParent;
+ }
+
+ private static boolean validFilterParent(RelNode filterNode, RelNode parent) {
+ boolean validParent = true;
+
+ // TOODO: Verify GB having is not a seperate filter (if so we shouldn't
+ // introduce derived table)
+ if (parent instanceof FilterRelBase || parent instanceof JoinRelBase
+ || parent instanceof SetOpRel) {
+ validParent = false;
+ }
+
+ return validParent;
+ }
+
+ private static boolean validGBParent(RelNode gbNode, RelNode parent) {
+ boolean validParent = true;
+
+ // TOODO: Verify GB having is not a seperate filter (if so we shouldn't
+ // introduce derived table)
+ if (parent instanceof JoinRelBase || parent instanceof SetOpRel
+ || parent instanceof AggregateRelBase
+ || (parent instanceof FilterRelBase && ((AggregateRelBase) gbNode).getGroupSet().isEmpty())) {
+ validParent = false;
+ }
+
+ return validParent;
+ }
+
+ private static boolean validSortParent(RelNode sortNode, RelNode parent) {
+ boolean validParent = true;
+
+ if (parent != null && !(parent instanceof ProjectRelBase)) {
+ validParent = false;
+ }
+
+ return validParent;
+ }
+
+ private static boolean validSortChild(HiveSortRel sortNode) {
+ boolean validChild = true;
+ RelNode child = sortNode.getChild();
+
+ if (!(child instanceof ProjectRelBase)) {
+ validChild = false;
+ }
+
+ return validChild;
+ }
+
+ private static boolean validSetopParent(RelNode setop, RelNode parent) {
+ boolean validChild = true;
+
+ if (parent != null && !(parent instanceof ProjectRelBase)) {
+ validChild = false;
+ }
+
+ return validChild;
+ }
+
+ private static boolean validSetopChild(RelNode setopChild) {
+ boolean validChild = true;
+
+ if (!(setopChild instanceof ProjectRelBase)) {
+ validChild = false;
+ }
+
+ return validChild;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java
new file mode 100644
index 0000000..5636919
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexVisitorImpl;
+
+/*
+ * convert a RexNode to an ExprNodeDesc
+ */
+public class ExprNodeConverter extends RexVisitorImpl {
+
+ RelDataType rType;
+ String tabAlias;
+ boolean partitioningExpr;
+
+ public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) {
+ super(true);
+ /*
+ * hb: 6/25/14 for now we only support expressions that only contain
+ * partition cols. there is no use case for supporting generic expressions.
+ * for supporting generic exprs., we need to give the converter information
+ * on whether a column is a partition column or not, whether a column is a
+ * virtual column or not.
+ */
+ assert partitioningExpr == true;
+ this.tabAlias = tabAlias;
+ this.rType = rType;
+ this.partitioningExpr = partitioningExpr;
+ }
+
+ @Override
+ public ExprNodeDesc visitInputRef(RexInputRef inputRef) {
+ RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+ return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias,
+ partitioningExpr);
+ }
+
+ @Override
+ public ExprNodeDesc visitCall(RexCall call) {
+ ExprNodeGenericFuncDesc gfDesc = null;
+
+ if (!deep) {
+ return null;
+ }
+
+ List args = new LinkedList();
+
+ for (RexNode operand : call.operands) {
+ args.add(operand.accept(this));
+ }
+
+ // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the
+ // exprnode
+ if (ASTConverter.isFlat(call)) {
+ ArrayList tmpExprArgs = new ArrayList();
+ tmpExprArgs.addAll(args.subList(0, 2));
+ gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()),
+ SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType()), tmpExprArgs);
+ for (int i = 2; i < call.operands.size(); i++) {
+ tmpExprArgs = new ArrayList();
+ tmpExprArgs.add(gfDesc);
+ tmpExprArgs.add(args.get(i));
+ gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()),
+ SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType()), tmpExprArgs);
+ }
+ } else {
+ gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()),
+ SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType()), args);
+ }
+
+ return gfDesc;
+ }
+
+ @Override
+ public ExprNodeDesc visitLiteral(RexLiteral literal) {
+ RelDataType lType = literal.getType();
+
+ switch (literal.getType().getSqlTypeName()) {
+ case BOOLEAN:
+ return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral
+ .booleanValue(literal)));
+ case TINYINT:
+ return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal
+ .getValue3()).byteValue()));
+ case SMALLINT:
+ return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo,
+ Short.valueOf(((Number) literal.getValue3()).shortValue()));
+ case INTEGER:
+ return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo,
+ Integer.valueOf(((Number) literal.getValue3()).intValue()));
+ case BIGINT:
+ return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal
+ .getValue3()).longValue()));
+ case FLOAT:
+ return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo,
+ Float.valueOf(((Number) literal.getValue3()).floatValue()));
+ case DOUBLE:
+ return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo,
+ Double.valueOf(((Number) literal.getValue3()).doubleValue()));
+ case DATE:
+ return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, literal.getValue3());
+ case TIMESTAMP:
+ return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, literal.getValue3());
+ case BINARY:
+ return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3());
+ case DECIMAL:
+ return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(),
+ lType.getScale()), literal.getValue3());
+ case VARCHAR:
+ return new ExprNodeConstantDesc(TypeInfoFactory.getVarcharTypeInfo(lType.getPrecision()),
+ new HiveVarchar((String) literal.getValue3(), lType.getPrecision()));
+ case CHAR:
+ return new ExprNodeConstantDesc(TypeInfoFactory.getCharTypeInfo(lType.getPrecision()),
+ new HiveChar((String) literal.getValue3(), lType.getPrecision()));
+ case OTHER:
+ default:
+ return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3());
+ }
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java
new file mode 100644
index 0000000..c051b65
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java
@@ -0,0 +1,697 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ForwardWalker;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.Aggregation;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelCollationImpl;
+import org.eigenbase.rel.RelFieldCollation;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptSchema;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.util.CompositeList;
+import org.eigenbase.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+
+public class RelNodeConverter {
+ private static final Map AGG_MAP = ImmutableMap
+ . builder()
+ .put(
+ "count",
+ SqlStdOperatorTable.COUNT)
+ .put("sum", SqlStdOperatorTable.SUM)
+ .put("min", SqlStdOperatorTable.MIN)
+ .put("max", SqlStdOperatorTable.MAX)
+ .put("avg", SqlStdOperatorTable.AVG)
+ .build();
+
+ public static RelNode convert(Operator extends OperatorDesc> sinkOp, RelOptCluster cluster,
+ RelOptSchema schema, SemanticAnalyzer sA, ParseContext pCtx) {
+
+ Context ctx = new Context(cluster, schema, sA, pCtx);
+
+ Map rules = ImmutableMap
+ . builder()
+ .put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"),
+ new TableScanProcessor())
+ .put(new RuleRegExp("R2", FilterOperator.getOperatorName() + "%"), new FilterProcessor())
+ .put(new RuleRegExp("R3", SelectOperator.getOperatorName() + "%"), new SelectProcessor())
+ .put(new RuleRegExp("R4", JoinOperator.getOperatorName() + "%"), new JoinProcessor())
+ .put(new RuleRegExp("R5", LimitOperator.getOperatorName() + "%"), new LimitProcessor())
+ .put(new RuleRegExp("R6", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor())
+ .put(new RuleRegExp("R7", ReduceSinkOperator.getOperatorName() + "%"),
+ new ReduceSinkProcessor()).build();
+
+ Dispatcher disp = new DefaultRuleDispatcher(new DefaultProcessor(), rules, ctx);
+ GraphWalker egw = new ForwardWalker(disp);
+
+ ArrayList topNodes = new ArrayList();
+ topNodes.addAll(pCtx.getTopOps().values());
+
+ HashMap