com.google.guava
guava
${guava.version}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
index 1ba5654..3634ba2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
@@ -32,7 +32,6 @@
*/
public class QueryProperties {
- boolean hasJoin = false;
boolean hasGroupBy = false;
boolean hasOrderBy = false;
boolean hasSortBy = false;
@@ -48,12 +47,28 @@
boolean mapJoinRemoved = false;
boolean hasMapGroupBy = false;
+ private int noOfJoins = 0;
+ private int noOfOuterJoins = 0;
+
+ private boolean multiDestQuery;
+ private boolean filterWithSubQuery;
+
public boolean hasJoin() {
- return hasJoin;
+ return (noOfJoins > 0);
+ }
+
+ public void incrementJoinCount(boolean outerJoin) {
+ noOfJoins++;
+ if (outerJoin)
+ noOfOuterJoins++;
+ }
+
+ public int getJoinCount() {
+ return noOfJoins;
}
- public void setHasJoin(boolean hasJoin) {
- this.hasJoin = hasJoin;
+ public int getOuterJoinCount() {
+ return noOfOuterJoins;
}
public boolean hasGroupBy() {
@@ -143,4 +158,43 @@ public boolean isHasMapGroupBy() {
public void setHasMapGroupBy(boolean hasMapGroupBy) {
this.hasMapGroupBy = hasMapGroupBy;
}
+
+ public boolean hasMultiDestQuery() {
+ return this.multiDestQuery;
+ }
+
+ public void setMultiDestQuery(boolean multiDestQuery) {
+ this.multiDestQuery = multiDestQuery;
+ }
+
+ public void setFilterWithSubQuery(boolean filterWithSubQuery) {
+ this.filterWithSubQuery = filterWithSubQuery;
+ }
+
+ public boolean hasFilterWithSubQuery() {
+ return this.filterWithSubQuery;
+ }
+
+
+ public void clear() {
+ hasGroupBy = false;
+ hasOrderBy = false;
+ hasSortBy = false;
+ hasJoinFollowedByGroupBy = false;
+ hasPTF = false;
+ hasWindowing = false;
+
+ usesScript = false;
+
+ hasDistributeBy = false;
+ hasClusterBy = false;
+ mapJoinRemoved = false;
+ hasMapGroupBy = false;
+
+ noOfJoins = 0;
+ noOfOuterJoins = 0;
+
+ multiDestQuery = false;
+ filterWithSubQuery = false;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
new file mode 100644
index 0000000..2c08772
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
@@ -0,0 +1,27 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import com.google.common.collect.ImmutableList;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdSelectivity;
+import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
+import org.eigenbase.rel.metadata.DefaultRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+
+/**
+ * Distinct row count and Selectivity is overridden for Hive.
+ *
+ * Distinct Row Count is overridden for:
+ * 1) Join 2) TableScan.
+ * Selectivity is overridden for:
+ * 1) Join 2) TableScan & Filter.
+ */
+public class HiveDefaultRelMetadataProvider {
+ private HiveDefaultRelMetadataProvider() {
+ }
+
+ public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList
+ .of(HiveRelMdDistinctRowCount.SOURCE,
+ HiveRelMdSelectivity.SOURCE,
+ new DefaultRelMetadataProvider()));
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
new file mode 100644
index 0000000..764606f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
@@ -0,0 +1,179 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import org.eigenbase.rel.RelFactories.ProjectFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.sql.validate.SqlValidatorUtil;
+import org.eigenbase.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+/**
+ * Generic utility functions needed for Optiq based Hive CBO.
+ */
+
+public class HiveOptiqUtil {
+
+ public static List getProjsFromBelowAsInputRef(final RelNode rel) {
+ List projectList = Lists.transform(rel.getRowType().getFieldList(),
+ new Function() {
+ public RexNode apply(RelDataTypeField field) {
+ return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex());
+ }
+ });
+ return projectList;
+ }
+
+ public static List translateBitSetToProjIndx(BitSet projBitSet) {
+ List projIndxLst = new ArrayList();
+
+ for (int i = 0; i < projBitSet.length(); i++) {
+ if (projBitSet.get(i)) {
+ projIndxLst.add(i);
+ }
+ }
+
+ return projIndxLst;
+ }
+
+ @Deprecated
+ public static void todo(String s) {
+ }
+
+ /**
+ * Push any equi join conditions that are not column references as Projections
+ * on top of the children.
+ *
+ * @param factory
+ * Project factory to use.
+ * @param inputRels
+ * inputs to a join
+ * @param leftJoinKeys
+ * expressions for LHS of join key
+ * @param rightJoinKeys
+ * expressions for RHS of join key
+ * @param systemColCount
+ * number of system columns, usually zero. These columns are
+ * projected at the leading edge of the output row.
+ * @param leftKeys on return this contains the join key positions from
+ * the new project rel on the LHS.
+ * @param rightKeys on return this contains the join key positions from
+ * the new project rel on the RHS.
+ * @return the join condition after the equi expressions pushed down.
+ */
+ public static RexNode projectNonColumnEquiConditions(ProjectFactory factory,
+ RelNode[] inputRels, List leftJoinKeys,
+ List rightJoinKeys, int systemColCount, List leftKeys,
+ List rightKeys) {
+ RelNode leftRel = inputRels[0];
+ RelNode rightRel = inputRels[1];
+ RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder();
+ RexNode outJoinCond = null;
+
+ int origLeftInputSize = leftRel.getRowType().getFieldCount();
+ int origRightInputSize = rightRel.getRowType().getFieldCount();
+
+ List newLeftFields = new ArrayList();
+ List newLeftFieldNames = new ArrayList();
+
+ List newRightFields = new ArrayList();
+ List newRightFieldNames = new ArrayList();
+ int leftKeyCount = leftJoinKeys.size();
+ int i;
+
+ for (i = 0; i < origLeftInputSize; i++) {
+ final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i);
+ newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newLeftFieldNames.add(field.getName());
+ }
+
+ for (i = 0; i < origRightInputSize; i++) {
+ final RelDataTypeField field = rightRel.getRowType().getFieldList()
+ .get(i);
+ newRightFields.add(rexBuilder.makeInputRef(field.getType(), i));
+ newRightFieldNames.add(field.getName());
+ }
+
+ int newKeyCount = 0;
+ List> origColEqConds = new ArrayList>();
+ for (i = 0; i < leftKeyCount; i++) {
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+
+ if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) {
+ origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(),
+ ((RexInputRef) rightKey).getIndex()));
+ } else {
+ newLeftFields.add(leftKey);
+ newLeftFieldNames.add(null);
+ newRightFields.add(rightKey);
+ newRightFieldNames.add(null);
+ newKeyCount++;
+ }
+ }
+
+ for (i = 0; i < origColEqConds.size(); i++) {
+ Pair p = origColEqConds.get(i);
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
+ leftKeys.add(p.left);
+ rightKeys.add(p.right);
+ RexNode cond = rexBuilder.makeCall(
+ SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left),
+ rexBuilder.makeInputRef(rightKey.getType(), systemColCount
+ + origLeftInputSize + newKeyCount + p.right));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond,
+ cond);
+ }
+ }
+
+ if (newKeyCount == 0) {
+ return outJoinCond;
+ }
+
+ int newLeftOffset = systemColCount + origLeftInputSize;
+ int newRightOffset = systemColCount + origLeftInputSize
+ + origRightInputSize + newKeyCount;
+ for (i = 0; i < newKeyCount; i++) {
+ leftKeys.add(origLeftInputSize + i);
+ rightKeys.add(origRightInputSize + i);
+ RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, rexBuilder
+ .makeInputRef(newLeftFields.get(i).getType(), newLeftOffset + i),
+ rexBuilder.makeInputRef(newLeftFields.get(i).getType(),
+ newRightOffset + i));
+ if (outJoinCond == null) {
+ outJoinCond = cond;
+ } else {
+ outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond,
+ cond);
+ }
+ }
+
+ // added project if need to produce new keys than the original input
+ // fields
+ if (newKeyCount > 0) {
+ leftRel = factory.createProject(leftRel, newLeftFields,
+ SqlValidatorUtil.uniquify(newLeftFieldNames));
+ rightRel = factory.createProject(rightRel, newRightFields,
+ SqlValidatorUtil.uniquify(newRightFieldNames));
+ }
+
+ inputRels[0] = leftRel;
+ inputRels[1] = rightRel;
+
+ return outJoinCond;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java
new file mode 100644
index 0000000..da77d36
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/JoinUtil.java
@@ -0,0 +1,295 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.SqlKind;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * Utility for inspecting Join Conditions.
+ *
+ * Main Elements:
+ * 1. JoinPredicateInfo - represents Join Condition.
+ * 2. JoinLeafPredicateInfo - represents leaf predicates with in join condition.
+ *
+ * TODO: Move this to Optiq Framework
+ */
+public class JoinUtil {
+
+ /**
+ * JoinPredicateInfo represents Join condition; JoinPredicate Info uses
+ * JoinLeafPredicateInfo to represent individual conjunctive elements in the
+ * predicate.
+ * JoinPredicateInfo = JoinLeafPredicateInfo1 and JoinLeafPredicateInfo2...
+ *
+ * JoinPredicateInfo:
+ * 1. preserves the order of conjuctive elements for
+ * equi-join(m_equiJoinPredicateElements)
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ * 3. Keeps a map of projection indexes that are part of join keys to list of
+ * conjuctive elements(JoinLeafPredicateInfo) that uses them.
+ *
+ */
+ public static class JoinPredicateInfo {
+ private final ImmutableList m_nonEquiJoinPredicateElements;
+ private final ImmutableList m_equiJoinPredicateElements;
+ private final ImmutableSet m_projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInJoinSchema;
+ private final ImmutableMap> m_mapOfProjIndxInJoinSchemaToLeafPInfo;
+
+ public JoinPredicateInfo(List nonEquiJoinPredicateElements,
+ List equiJoinPredicateElements,
+ Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema,
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo) {
+ m_nonEquiJoinPredicateElements = ImmutableList.copyOf(nonEquiJoinPredicateElements);
+ m_equiJoinPredicateElements = ImmutableList.copyOf(equiJoinPredicateElements);
+ m_projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ m_mapOfProjIndxInJoinSchemaToLeafPInfo = ImmutableMap
+ .copyOf(mapOfProjIndxInJoinSchemaToLeafPInfo);
+ }
+
+ public List getNonEquiJoinPredicateElements() {
+ return m_nonEquiJoinPredicateElements;
+ }
+
+ public List getEquiJoinPredicateElements() {
+ return m_equiJoinPredicateElements;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return m_projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return m_projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ public Map> getMapOfProjIndxToLeafPInfo() {
+ return m_mapOfProjIndxInJoinSchemaToLeafPInfo;
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j) {
+ return constructJoinPredicateInfo(j, j.getCondition());
+ }
+
+ public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoinRel j, RexNode predicate) {
+ JoinPredicateInfo jpi = null;
+ JoinLeafPredicateInfo jlpi = null;
+ List equiLPIList = new ArrayList();
+ List nonEquiLPIList = new ArrayList();
+ Set projsFromLeftPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeys = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ Map> tmpMapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ Map> mapOfProjIndxInJoinSchemaToLeafPInfo = new HashMap>();
+ List tmpJLPILst = null;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+ int projIndxInJoin;
+ List conjuctiveElements;
+
+ todo("Move this to Optiq");
+
+ // 1. Decompose Join condition to a number of leaf predicates
+ // (conjuctive elements)
+ conjuctiveElements = RelOptUtil.conjunctions(predicate);
+
+ // 2. Walk through leaf predicates building up JoinLeafPredicateInfo
+ for (RexNode ce : conjuctiveElements) {
+ // 2.1 Construct JoinLeafPredicateInfo
+ jlpi = JoinLeafPredicateInfo.constructJoinLeafPredicateInfo(j, ce);
+
+ // 2.2 Classify leaf predicate as Equi vs Non Equi
+ if (jlpi.m_comparisonType.equals(SqlKind.EQUALS)) {
+ equiLPIList.add(jlpi);
+ } else {
+ nonEquiLPIList.add(jlpi);
+ }
+
+ // 2.3 Maintain join keys coming from left vs right (in child &
+ // Join Schema)
+ projsFromLeftPartOfJoinKeys.addAll(jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeys.addAll(jlpi.getProjsFromRightPartOfJoinKeysInChildSchema());
+ projsFromRightPartOfJoinKeysInJoinSchema.addAll(jlpi
+ .getProjsFromRightPartOfJoinKeysInJoinSchema());
+
+ // 2.4 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from left
+ for (Integer projIndx : jlpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndx);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndx, tmpJLPILst);
+ }
+
+ // 2.5 Update Join Key to JoinLeafPredicateInfo map with keys
+ // from right
+ for (Integer projIndx : jlpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ projIndxInJoin = projIndx + rightOffSet;
+ tmpJLPILst = tmpMapOfProjIndxInJoinSchemaToLeafPInfo.get(projIndxInJoin);
+ if (tmpJLPILst == null)
+ tmpJLPILst = new ArrayList();
+ tmpJLPILst.add(jlpi);
+ tmpMapOfProjIndxInJoinSchemaToLeafPInfo.put(projIndxInJoin, tmpJLPILst);
+ }
+
+ }
+
+ // 3. Update Update Join Key to List to use
+ // ImmutableList
+ for (Entry> e : tmpMapOfProjIndxInJoinSchemaToLeafPInfo
+ .entrySet()) {
+ mapOfProjIndxInJoinSchemaToLeafPInfo.put(e.getKey(), ImmutableList.copyOf(e.getValue()));
+ }
+
+ // 4. Construct JoinPredicateInfo
+ jpi = new JoinPredicateInfo(nonEquiLPIList, equiLPIList, projsFromLeftPartOfJoinKeys,
+ projsFromRightPartOfJoinKeys, projsFromRightPartOfJoinKeysInJoinSchema,
+ mapOfProjIndxInJoinSchemaToLeafPInfo);
+ return jpi;
+ }
+ }
+
+ /**
+ * JoinLeafPredicateInfo represents leaf predicate in Join condition
+ * (conjuctive lement).
+ *
+ * JoinLeafPredicateInfo:
+ * 1. Stores list of expressions from left and right child which is part of
+ * equi join keys.
+ * 2. Stores set of projection indexes from left and right child which is part
+ * of equi join keys; the indexes are both in child and Join node schema.
+ */
+ public static class JoinLeafPredicateInfo {
+ private final SqlKind m_comparisonType;
+ private final ImmutableList m_joinKeyExprsFromLeft;
+ private final ImmutableList m_joinKeyExprsFromRight;
+ private final ImmutableSet m_projsFromLeftPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInChildSchema;
+ private final ImmutableSet m_projsFromRightPartOfJoinKeysInJoinSchema;
+
+ public JoinLeafPredicateInfo(SqlKind comparisonType, List joinKeyExprsFromLeft,
+ List joinKeyExprsFromRight, Set projsFromLeftPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInChildSchema,
+ Set projsFromRightPartOfJoinKeysInJoinSchema) {
+ m_comparisonType = comparisonType;
+ m_joinKeyExprsFromLeft = ImmutableList.copyOf(joinKeyExprsFromLeft);
+ m_joinKeyExprsFromRight = ImmutableList.copyOf(joinKeyExprsFromRight);
+ m_projsFromLeftPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromLeftPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInChildSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInChildSchema);
+ m_projsFromRightPartOfJoinKeysInJoinSchema = ImmutableSet
+ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema);
+ }
+
+ public List getJoinKeyExprsFromLeft() {
+ return m_joinKeyExprsFromLeft;
+ }
+
+ public List getJoinKeyExprsFromRight() {
+ return m_joinKeyExprsFromRight;
+ }
+
+ public Set getProjsFromLeftPartOfJoinKeysInChildSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ /**
+ * NOTE: Join Schema = left Schema + (right Schema offset by
+ * left.fieldcount). Hence its ok to return projections from left in child
+ * schema.
+ */
+ public Set getProjsFromLeftPartOfJoinKeysInJoinSchema() {
+ return m_projsFromLeftPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInChildSchema() {
+ return m_projsFromRightPartOfJoinKeysInChildSchema;
+ }
+
+ public Set getProjsFromRightPartOfJoinKeysInJoinSchema() {
+ return m_projsFromRightPartOfJoinKeysInJoinSchema;
+ }
+
+ public static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoinRel j, RexNode pe) {
+ JoinLeafPredicateInfo jlpi = null;
+ List filterNulls = new ArrayList();
+ List joinKeyExprsFromLeft = new ArrayList();
+ List joinKeyExprsFromRight = new ArrayList();
+ Set projsFromLeftPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInChildSchema = new HashSet();
+ Set projsFromRightPartOfJoinKeysInJoinSchema = new HashSet();
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ todo("Move this to Optiq");
+
+ // 1. Split leaf join predicate to expressions from left, right
+ @SuppressWarnings("unused")
+ RexNode nonEquiPredicate = RelOptUtil.splitJoinCondition(j.getSystemFieldList(), j.getLeft(),
+ j.getRight(), pe, joinKeyExprsFromLeft, joinKeyExprsFromRight, filterNulls, null);
+
+ // 2. For left expressions, collect child projection indexes used
+ InputReferencedVisitor irvLeft = new InputReferencedVisitor();
+ irvLeft.apply(joinKeyExprsFromLeft);
+ projsFromLeftPartOfJoinKeysInChildSchema.addAll(irvLeft.inputPosReferenced);
+
+ // 3. For right expressions, collect child projection indexes used
+ InputReferencedVisitor irvRight = new InputReferencedVisitor();
+ irvRight.apply(joinKeyExprsFromRight);
+ projsFromRightPartOfJoinKeysInChildSchema.addAll(irvRight.inputPosReferenced);
+
+ // 3. Translate projection indexes from right to join schema, by adding
+ // offset.
+ for (Integer indx : projsFromRightPartOfJoinKeysInChildSchema) {
+ projsFromRightPartOfJoinKeysInJoinSchema.add(indx + rightOffSet);
+ }
+
+ // 4. Construct JoinLeafPredicateInfo
+ jlpi = new JoinLeafPredicateInfo(pe.getKind(), joinKeyExprsFromLeft, joinKeyExprsFromRight,
+ projsFromLeftPartOfJoinKeysInChildSchema, projsFromRightPartOfJoinKeysInChildSchema,
+ projsFromRightPartOfJoinKeysInJoinSchema);
+
+ return jlpi;
+ }
+ }
+
+ @Deprecated
+ public static void todo(String s) {
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
new file mode 100644
index 0000000..6df8438
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
@@ -0,0 +1,318 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.TableAccessRel;
+import org.eigenbase.relopt.RelOptAbstractTable;
+import org.eigenbase.relopt.RelOptSchema;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.rex.RexNode;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+
+public class RelOptHiveTable extends RelOptAbstractTable {
+ private final Table m_hiveTblMetadata;
+ private final ImmutableList m_hiveNonPartitionCols;
+ private final ImmutableMap m_hiveNonPartitionColsMap;
+ private final ImmutableMap m_hivePartitionColsMap;
+ private final int m_noOfProjs;
+ final HiveConf m_hiveConf;
+
+ private double m_rowCount = -1;
+ Map m_hiveColStatsMap = new HashMap();
+ PrunedPartitionList partitionList;
+ Map partitionCache;
+ AtomicInteger noColsMissingStats;
+
+ protected static final Log LOG = LogFactory
+ .getLog(RelOptHiveTable.class
+ .getName());
+
+ public RelOptHiveTable(RelOptSchema optiqSchema, String name, RelDataType rowType,
+ Table hiveTblMetadata, List hiveNonPartitionCols,
+ List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) {
+ super(optiqSchema, name, rowType);
+ m_hiveTblMetadata = hiveTblMetadata;
+ m_hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols);
+ m_hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0);
+ m_hivePartitionColsMap = getColInfoMap(hivePartitionCols, m_hiveNonPartitionColsMap.size());
+ m_noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size();
+ m_hiveConf = hconf;
+ this.partitionCache = partitionCache;
+ this.noColsMissingStats = noColsMissingStats;
+ }
+
+ private static ImmutableMap getColInfoMap(List hiveCols,
+ int startIndx) {
+ Builder bldr = ImmutableMap. builder();
+
+ int indx = startIndx;
+ for (ColumnInfo ci : hiveCols) {
+ bldr.put(indx, ci);
+ indx++;
+ }
+
+ return bldr.build();
+ }
+
+ @Override
+ public boolean isKey(BitSet arg0) {
+ return false;
+ }
+
+ @Override
+ public RelNode toRel(ToRelContext context) {
+ return new TableAccessRel(context.getCluster(), this);
+ }
+
+ @Override
+ public T unwrap(Class arg0) {
+ return arg0.isInstance(this) ? arg0.cast(this) : null;
+ }
+
+ @Override
+ public double getRowCount() {
+ if (m_rowCount == -1) {
+ if (null == partitionList) {
+ // we are here either unpartitioned table or partitioned table with no predicates
+ computePartitionList(m_hiveConf, null);
+ }
+ if (m_hiveTblMetadata.isPartitioned()) {
+ List rowCounts = StatsUtils.getBasicStatForPartitions(
+ m_hiveTblMetadata, partitionList.getNotDeniedPartns(),
+ StatsSetupConst.ROW_COUNT);
+ m_rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts);
+
+ } else {
+ m_rowCount = StatsUtils.getNumRows(m_hiveTblMetadata);
+ }
+ }
+
+ return m_rowCount;
+ }
+
+ public Table getHiveTableMD() {
+ return m_hiveTblMetadata;
+ }
+
+ private String getColNamesForLogging(Set colLst) {
+ StringBuffer sb = new StringBuffer();
+ boolean firstEntry = true;
+ for (String colName : colLst) {
+ if (firstEntry) {
+ sb.append(colName);
+ firstEntry = false;
+ } else {
+ sb.append(", " + colName);
+ }
+ }
+ return sb.toString();
+ }
+
+ public void computePartitionList(HiveConf conf, RexNode pruneNode) {
+
+ try {
+ if (!m_hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) {
+ // there is no predicate on partitioning column, we need all partitions in this case.
+ partitionList = PartitionPruner.prune(m_hiveTblMetadata, null, conf, getName(), partitionCache);
+ return;
+ }
+
+ // We have valid pruning expressions, only retrieve qualifying partitions
+ ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+
+ partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, getName(), partitionCache);
+ } catch (HiveException he) {
+ throw new RuntimeException(he);
+ }
+ }
+
+ private void updateColStats(Set projIndxLst) {
+ List nonPartColNamesThatRqrStats = new ArrayList();
+ List nonPartColIndxsThatRqrStats = new ArrayList();
+ List partColNamesThatRqrStats = new ArrayList();
+ List partColIndxsThatRqrStats = new ArrayList();
+ Set colNamesFailedStats = new HashSet();
+
+ // 1. Separate required columns to Non Partition and Partition Cols
+ ColumnInfo tmp;
+ for (Integer pi : projIndxLst) {
+ if (m_hiveColStatsMap.get(pi) == null) {
+ if ((tmp = m_hiveNonPartitionColsMap.get(pi)) != null) {
+ nonPartColNamesThatRqrStats.add(tmp.getInternalName());
+ nonPartColIndxsThatRqrStats.add(pi);
+ } else if ((tmp = m_hivePartitionColsMap.get(pi)) != null) {
+ partColNamesThatRqrStats.add(tmp.getInternalName());
+ partColIndxsThatRqrStats.add(pi);
+ } else {
+ String logMsg = "Unable to find Column Index: " + pi + ", in "
+ + m_hiveTblMetadata.getCompleteName();
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+ }
+
+ if (null == partitionList) {
+ // We could be here either because its an unpartitioned table or because
+ // there are no pruning predicates on a partitioned table.
+ computePartitionList(m_hiveConf, null);
+ }
+
+ // 2. Obtain Col Stats for Non Partition Cols
+ if (nonPartColNamesThatRqrStats.size() > 0) {
+ List hiveColStats;
+
+ if (!m_hiveTblMetadata.isPartitioned()) {
+ // 2.1 Handle the case for unpartitioned table.
+ hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, m_hiveNonPartitionCols,
+ nonPartColNamesThatRqrStats);
+
+ // 2.1.1 Record Column Names that we needed stats for but couldn't
+ if (hiveColStats == null) {
+ colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
+ } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
+ Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats);
+
+ Set setOfObtainedColStats = new HashSet();
+ for (ColStatistics cs : hiveColStats) {
+ setOfObtainedColStats.add(cs.getColumnName());
+ }
+ setOfFiledCols.removeAll(setOfObtainedColStats);
+
+ colNamesFailedStats.addAll(setOfFiledCols);
+ }
+ } else {
+ // 2.2 Obtain col stats for partitioned table.
+ try {
+ if (partitionList.getNotDeniedPartns().isEmpty()) {
+ // no need to make a metastore call
+ m_rowCount = 0;
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ // add empty stats object for each column
+ hiveColStats.add(new ColStatistics(m_hiveTblMetadata.getTableName(), c, null));
+ }
+ colNamesFailedStats.clear();
+ } else {
+ Statistics stats = StatsUtils.collectStatistics(m_hiveConf, partitionList,
+ m_hiveTblMetadata, m_hiveNonPartitionCols, nonPartColNamesThatRqrStats, true, true);
+ m_rowCount = stats.getNumRows();
+ hiveColStats = new ArrayList();
+ for (String c : nonPartColNamesThatRqrStats) {
+ ColStatistics cs = stats.getColumnStatisticsFromColName(c);
+ if (cs != null) {
+ hiveColStats.add(cs);
+ } else {
+ colNamesFailedStats.add(c);
+ }
+ }
+ }
+ } catch (HiveException e) {
+ String logMsg = "Collecting stats failed.";
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
+ for (int i = 0; i < hiveColStats.size(); i++) {
+ m_hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
+ }
+ }
+ }
+
+ // 3. Obtain Stats for Partition Cols
+ if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
+ ColStatistics cStats = null;
+ for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
+ cStats = new ColStatistics(m_hiveTblMetadata.getTableName(),
+ partColNamesThatRqrStats.get(i), m_hivePartitionColsMap.get(
+ partColIndxsThatRqrStats.get(i)).getTypeName());
+ cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i)));
+ m_hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats);
+ }
+ }
+
+ // 4. Warn user if we could get stats for required columns
+ if (!colNamesFailedStats.isEmpty()) {
+ String logMsg = "No Stats for " + m_hiveTblMetadata.getCompleteName() + ", Columns: "
+ + getColNamesForLogging(colNamesFailedStats);
+ LOG.error(logMsg);
+ noColsMissingStats.getAndAdd(colNamesFailedStats.size());
+ throw new RuntimeException(logMsg);
+ }
+ }
+
+ private int getDistinctCount(Set partitions, String partColName) {
+ Set distinctVals = new HashSet(partitions.size());
+ for (Partition partition : partitions) {
+ distinctVals.add(partition.getSpec().get(partColName));
+ }
+ return distinctVals.size();
+ }
+
+ public List getColStat(List projIndxLst) {
+ ImmutableList.Builder colStatsBldr = ImmutableList. builder();
+
+ if (projIndxLst != null) {
+ updateColStats(new HashSet(projIndxLst));
+ for (Integer i : projIndxLst) {
+ colStatsBldr.add(m_hiveColStatsMap.get(i));
+ }
+ } else {
+ List pILst = new ArrayList();
+ for (Integer i = 0; i < m_noOfProjs; i++) {
+ pILst.add(i);
+ }
+ updateColStats(new HashSet(pILst));
+ for (Integer pi : pILst) {
+ colStatsBldr.add(m_hiveColStatsMap.get(pi));
+ }
+ }
+
+ return colStatsBldr.build();
+ }
+
+ /*
+ * use to check if a set of columns are all partition columns.
+ * true only if:
+ * - all columns in BitSet are partition
+ * columns.
+ */
+ public boolean containsPartitionColumnsOnly(BitSet cols) {
+
+ for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) {
+ if (!m_hivePartitionColsMap.containsKey(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java
new file mode 100644
index 0000000..e8069ee
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java
@@ -0,0 +1,54 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelCollationImpl;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.rex.RexNode;
+
+public class TraitsUtil {
+
+ public static RelTraitSet getSelectTraitSet(RelOptCluster cluster, List exps,
+ RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getSortTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelCollation collation) {
+ return traitSet.plus(collation);
+ }
+
+ public static RelTraitSet getFilterTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getLimitTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getAggregateTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ List gbCols, List aggCalls, RelNode child) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getTableScanTraitSet(RelOptCluster cluster, RelTraitSet traitSet,
+ RelOptHiveTable table, RelDataType rowtype) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getJoinTraitSet(RelOptCluster cluster, RelTraitSet traitSet) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+
+ public static RelTraitSet getUnionTraitSet(RelOptCluster cluster, RelTraitSet traitSet) {
+ return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java
new file mode 100644
index 0000000..34a37e4
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCost.java
@@ -0,0 +1,194 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptCostFactory;
+import org.eigenbase.relopt.RelOptUtil;
+
+// TODO: This should inherit from VolcanoCost and should just override isLE method.
+public class HiveCost implements RelOptCost {
+ // ~ Static fields/initializers ---------------------------------------------
+
+ public static final HiveCost INFINITY = new HiveCost(Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY,
+ Double.POSITIVE_INFINITY) {
+ @Override
+ public String toString() {
+ return "{inf}";
+ }
+ };
+
+ public static final HiveCost HUGE = new HiveCost(Double.MAX_VALUE, Double.MAX_VALUE,
+ Double.MAX_VALUE) {
+ @Override
+ public String toString() {
+ return "{huge}";
+ }
+ };
+
+ public static final HiveCost ZERO = new HiveCost(0.0, 0.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{0}";
+ }
+ };
+
+ public static final HiveCost TINY = new HiveCost(1.0, 1.0, 0.0) {
+ @Override
+ public String toString() {
+ return "{tiny}";
+ }
+ };
+
+ public static final RelOptCostFactory FACTORY = new Factory();
+
+ // ~ Instance fields --------------------------------------------------------
+
+ final double cpu;
+ final double io;
+ final double rowCount;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ HiveCost(double rowCount, double cpu, double io) {
+ assert rowCount >= 0d;
+ assert cpu >= 0d;
+ assert io >= 0d;
+ this.rowCount = rowCount;
+ this.cpu = cpu;
+ this.io = io;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ public double getCpu() {
+ return cpu;
+ }
+
+ public boolean isInfinite() {
+ return (this == INFINITY) || (this.rowCount == Double.POSITIVE_INFINITY)
+ || (this.cpu == Double.POSITIVE_INFINITY) || (this.io == Double.POSITIVE_INFINITY);
+ }
+
+ public double getIo() {
+ return io;
+ }
+
+ // TODO: If two cost is equal, could we do any better than comparing
+ // cardinality (may be some other heuristics to break the tie)
+ public boolean isLe(RelOptCost other) {
+ return this == other || this.rowCount <= other.getRows();
+ /*
+ * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows
+ * <= other.getRows())) { return true; } else { return false; }
+ */
+ }
+
+ public boolean isLt(RelOptCost other) {
+ return this.rowCount < other.getRows();
+ /*
+ * return isLe(other) && !equals(other);
+ */
+ }
+
+ public double getRows() {
+ return rowCount;
+ }
+
+ public boolean equals(RelOptCost other) {
+ return (this == other) || ((this.rowCount) == (other.getRows()));
+
+ /*
+ * //TODO: should we consider cardinality as well? return (this == other) ||
+ * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()));
+ */
+ }
+
+ public boolean isEqWithEpsilon(RelOptCost other) {
+ return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON);
+ /*
+ * return (this == other) || (Math.abs((this.dCpu + this.dIo) -
+ * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON);
+ */
+ }
+
+ public RelOptCost minus(RelOptCost other) {
+ if (this == INFINITY) {
+ return this;
+ }
+
+ return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io
+ - other.getIo());
+ }
+
+ public RelOptCost multiplyBy(double factor) {
+ if (this == INFINITY) {
+ return this;
+ }
+ return new HiveCost(rowCount * factor, cpu * factor, io * factor);
+ }
+
+ public double divideBy(RelOptCost cost) {
+ // Compute the geometric average of the ratios of all of the factors
+ // which are non-zero and finite.
+ double d = 1;
+ double n = 0;
+ if ((this.rowCount != 0) && !Double.isInfinite(this.rowCount) && (cost.getRows() != 0)
+ && !Double.isInfinite(cost.getRows())) {
+ d *= this.rowCount / cost.getRows();
+ ++n;
+ }
+ if ((this.cpu != 0) && !Double.isInfinite(this.cpu) && (cost.getCpu() != 0)
+ && !Double.isInfinite(cost.getCpu())) {
+ d *= this.cpu / cost.getCpu();
+ ++n;
+ }
+ if ((this.io != 0) && !Double.isInfinite(this.io) && (cost.getIo() != 0)
+ && !Double.isInfinite(cost.getIo())) {
+ d *= this.io / cost.getIo();
+ ++n;
+ }
+ if (n == 0) {
+ return 1.0;
+ }
+ return Math.pow(d, 1 / n);
+ }
+
+ public RelOptCost plus(RelOptCost other) {
+ if ((this == INFINITY) || (other.isInfinite())) {
+ return INFINITY;
+ }
+ return new HiveCost(this.rowCount + other.getRows(), this.cpu + other.getCpu(), this.io
+ + other.getIo());
+ }
+
+ @Override
+ public String toString() {
+ return "{" + rowCount + " rows, " + cpu + " cpu, " + io + " io}";
+ }
+
+ private static class Factory implements RelOptCostFactory {
+ private Factory() {
+ }
+
+ public RelOptCost makeCost(double rowCount, double cpu, double io) {
+ return new HiveCost(rowCount, cpu, io);
+ }
+
+ public RelOptCost makeHugeCost() {
+ return HUGE;
+ }
+
+ public HiveCost makeInfiniteCost() {
+ return INFINITY;
+ }
+
+ public HiveCost makeTinyCost() {
+ return TINY;
+ }
+
+ public HiveCost makeZeroCost() {
+ return ZERO;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java
new file mode 100644
index 0000000..926bca5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveCostUtil.java
@@ -0,0 +1,23 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.relopt.RelOptCost;
+
+public class HiveCostUtil {
+ private static final double cpuCostInNanoSec = 1.0;
+ private static final double netCostInNanoSec = 150 * cpuCostInNanoSec;
+ private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec;
+ private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec;
+ private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec;
+
+ public static RelOptCost computCardinalityBasedCost(HiveRel hr) {
+ return new HiveCost(hr.getRows(), 0, 0);
+ }
+
+ public static HiveCost computeCost(HiveTableScanRel t) {
+ double cardinality = t.getRows();
+ return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java
new file mode 100644
index 0000000..15596bc
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java
@@ -0,0 +1,31 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.cost;
+
+import org.eigenbase.rel.RelCollationTraitDef;
+import org.eigenbase.relopt.ConventionTraitDef;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.volcano.VolcanoPlanner;
+
+/**
+ * Refinement of {@link org.eigenbase.relopt.volcano.VolcanoPlanner} for Hive.
+ *
+ *
+ * It uses {@link org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost} as
+ * its cost model.
+ */
+public class HiveVolcanoPlanner extends VolcanoPlanner {
+ private static final boolean ENABLE_COLLATION_TRAIT = true;
+
+ /** Creates a HiveVolcanoPlanner. */
+ public HiveVolcanoPlanner() {
+ super(HiveCost.FACTORY, null);
+ }
+
+ public static RelOptPlanner createPlanner() {
+ final VolcanoPlanner planner = new HiveVolcanoPlanner();
+ planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+ if (ENABLE_COLLATION_TRAIT) {
+ planner.addRelTraitDef(RelCollationTraitDef.INSTANCE);
+ }
+ return planner;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java
new file mode 100644
index 0000000..1588cdf
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveAggregateRel.java
@@ -0,0 +1,52 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.BitSet;
+import java.util.List;
+
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+
+public class HiveAggregateRel extends AggregateRelBase implements HiveRel {
+
+ public HiveAggregateRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ BitSet groupSet, List aggCalls) throws InvalidRelException {
+ super(cluster, TraitsUtil.getAggregateTraitSet(cluster, traitSet, BitSets.toList(groupSet),
+ aggCalls, child), child, groupSet, aggCalls);
+ }
+
+ public AggregateRelBase copy(RelTraitSet traitSet, RelNode input, BitSet groupSet,
+ List aggCalls) {
+ try {
+ return new HiveAggregateRel(getCluster(), traitSet, input, groupSet, aggCalls);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public double getRows() {
+ return RelMetadataQuery.getDistinctRowCount(this, groupSet, getCluster().getRexBuilder()
+ .makeLiteral(true));
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java
new file mode 100644
index 0000000..5c0f386
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveFilterRel.java
@@ -0,0 +1,51 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.RelFactories.FilterFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveFilterRel extends FilterRelBase implements HiveRel {
+
+ public static final FilterFactory DEFAULT_FILTER_FACTORY = new HiveFilterFactoryImpl();
+
+ public HiveFilterRel(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) {
+ super(cluster, TraitsUtil.getFilterTraitSet(cluster, traits, child), child, condition);
+ }
+
+ @Override
+ public FilterRelBase copy(RelTraitSet traitSet, RelNode input, RexNode condition) {
+ assert traitSet.containsIfApplicable(HiveRel.CONVENTION);
+ return new HiveFilterRel(getCluster(), traitSet, input, getCondition());
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ /**
+ * Implementation of {@link FilterFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel}
+ * .
+ */
+ private static class HiveFilterFactoryImpl implements FilterFactory {
+ @Override
+ public RelNode createFilter(RelNode child, RexNode condition) {
+ RelOptCluster cluster = child.getCluster();
+ HiveFilterRel filter = new HiveFilterRel(cluster, TraitsUtil.getFilterTraitSet(cluster, null,
+ child), child, condition);
+ return filter;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java
new file mode 100644
index 0000000..de4ec71
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java
@@ -0,0 +1,136 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelFactories.JoinFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexNode;
+
+public class HiveJoinRel extends JoinRelBase implements HiveRel {
+ // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN
+ // each parallel computation handles multiple splits where as in case of SMB
+ // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is
+ // hash joins where MAP_JOIN keeps the whole data set of non streaming tables
+ // in memory where as BUCKET_JOIN keeps only the b
+ public enum JoinAlgorithm {
+ NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN
+ }
+
+ public enum MapJoinStreamingRelation {
+ NONE, LEFT_RELATION, RIGHT_RELATION
+ }
+
+ public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl();
+
+ private final boolean m_leftSemiJoin;
+ private final JoinAlgorithm m_joinAlgorithm;
+ private MapJoinStreamingRelation m_mapJoinStreamingSide = MapJoinStreamingRelation.NONE;
+
+ public static HiveJoinRel getJoin(RelOptCluster cluster, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, boolean leftSemiJoin) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoinRel(cluster, null, left, right, condition, joinType, variablesStopped,
+ JoinAlgorithm.NONE, null, leftSemiJoin);
+ } catch (InvalidRelException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ protected HiveJoinRel(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right,
+ RexNode condition, JoinRelType joinType, Set variablesStopped,
+ JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin)
+ throws InvalidRelException {
+ super(cluster, TraitsUtil.getJoinTraitSet(cluster, traits), left, right, condition, joinType,
+ variablesStopped);
+ this.m_joinAlgorithm = joinAlgo;
+ m_leftSemiJoin = leftSemiJoin;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public final HiveJoinRel copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left,
+ RelNode right, JoinRelType joinType, boolean semiJoinDone) {
+ try {
+ Set variablesStopped = Collections.emptySet();
+ return new HiveJoinRel(getCluster(), traitSet, left, right, conditionExpr, joinType,
+ variablesStopped, JoinAlgorithm.NONE, null, m_leftSemiJoin);
+ } catch (InvalidRelException e) {
+ // Semantic error not possible. Must be a bug. Convert to
+ // internal error.
+ throw new AssertionError(e);
+ }
+ }
+
+ public JoinAlgorithm getJoinAlgorithm() {
+ return m_joinAlgorithm;
+ }
+
+ public boolean isLeftSemiJoin() {
+ return m_leftSemiJoin;
+ }
+
+ /**
+ * Model cost of join as size of Inputs.
+ */
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ double leftRCount = RelMetadataQuery.getRowCount(getLeft());
+ double rightRCount = RelMetadataQuery.getRowCount(getRight());
+ return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0);
+ }
+
+ /**
+ * @return returns rowtype representing only the left join input
+ */
+ public RelDataType deriveRowType() {
+ if (m_leftSemiJoin) {
+ return deriveJoinRowType(left.getRowType(), null, JoinRelType.INNER,
+ getCluster().getTypeFactory(), null,
+ Collections. emptyList());
+ }
+ return super.deriveRowType();
+ }
+
+ private static class HiveJoinFactoryImpl implements JoinFactory {
+ /**
+ * Creates a join.
+ *
+ * @param left
+ * Left input
+ * @param right
+ * Right input
+ * @param condition
+ * Join condition
+ * @param joinType
+ * Join type
+ * @param variablesStopped
+ * Set of names of variables which are set by the LHS and used by
+ * the RHS and are not available to nodes above this JoinRel in the
+ * tree
+ * @param semiJoinDone
+ * Whether this join has been translated to a semi-join
+ */
+ @Override
+ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType,
+ Set variablesStopped, boolean semiJoinDone) {
+ return getJoin(left.getCluster(), left, right, condition, joinType, false);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java
new file mode 100644
index 0000000..bf37a7b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveLimitRel.java
@@ -0,0 +1,40 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SingleRel;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveLimitRel extends SingleRel implements HiveRel {
+ private final RexNode offset;
+ private final RexNode fetch;
+
+ HiveLimitRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RexNode offset,
+ RexNode fetch) {
+ super(cluster, TraitsUtil.getLimitTraitSet(cluster, traitSet, child), child);
+ this.offset = offset;
+ this.fetch = fetch;
+ assert getConvention() instanceof HiveRel;
+ assert getConvention() == child.getConvention();
+ }
+
+ @Override
+ public HiveLimitRel copy(RelTraitSet traitSet, List newInputs) {
+ return new HiveLimitRel(getCluster(), traitSet, sole(newInputs), offset, fetch);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java
new file mode 100644
index 0000000..84b527e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java
@@ -0,0 +1,154 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelFactories.ProjectFactory;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.util.mapping.Mapping;
+import org.eigenbase.util.mapping.MappingType;
+
+public class HiveProjectRel extends ProjectRelBase implements HiveRel {
+
+ public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl();
+
+ /**
+ * Creates a HiveProjectRel.
+ *
+ * @param cluster
+ * Cluster this relational expression belongs to
+ * @param child
+ * input relational expression
+ * @param exps
+ * List of expressions for the input columns
+ * @param rowType
+ * output row type
+ * @param flags
+ * values as in {@link ProjectRelBase.Flags}
+ */
+ public HiveProjectRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ List exps, RelDataType rowType, int flags) {
+ super(cluster, traitSet, child, exps, rowType, flags);
+ }
+
+ /**
+ * Creates a HiveProjectRel with no sort keys.
+ *
+ * @param child
+ * input relational expression
+ * @param exps
+ * set of expressions for the input columns
+ * @param fieldNames
+ * aliases of the expressions
+ */
+ public static HiveProjectRel create(RelNode child, List exps, List fieldNames) {
+ RelOptCluster cluster = child.getCluster();
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames);
+ return create(cluster, child, exps, rowType, Collections. emptyList());
+ }
+
+ /**
+ * Creates a HiveProjectRel.
+ */
+ public static HiveProjectRel create(RelOptCluster cluster, RelNode child, List exps,
+ RelDataType rowType, final List collationList) {
+ RelTraitSet traitSet = TraitsUtil.getSelectTraitSet(cluster, exps, child);
+ return new HiveProjectRel(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a relational expression which projects the output fields of a
+ * relational expression according to a partial mapping.
+ *
+ *
+ * A partial mapping is weaker than a permutation: every target has one
+ * source, but a source may have 0, 1 or more than one targets. Usually the
+ * result will have fewer fields than the source, unless some source fields
+ * are projected multiple times.
+ *
+ *
+ * This method could optimize the result as {@link #permute} does, but does
+ * not at present.
+ *
+ * @param rel
+ * Relational expression
+ * @param mapping
+ * Mapping from source fields to target fields. The mapping type must
+ * obey the constraints {@link MappingType#isMandatorySource()} and
+ * {@link MappingType#isSingleSource()}, as does
+ * {@link MappingType#INVERSE_FUNCTION}.
+ * @param fieldNames
+ * Field names; if null, or if a particular entry is null, the name
+ * of the permuted field is used
+ * @return relational expression which projects a subset of the input fields
+ */
+ public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) {
+ assert mapping.getMappingType().isSingleSource();
+ assert mapping.getMappingType().isMandatorySource();
+
+ if (mapping.isIdentity()) {
+ return rel;
+ }
+
+ final List outputNameList = new ArrayList();
+ final List outputProjList = new ArrayList();
+ final List fields = rel.getRowType().getFieldList();
+ final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+
+ for (int i = 0; i < mapping.getTargetCount(); i++) {
+ int source = mapping.getSource(i);
+ final RelDataTypeField sourceField = fields.get(source);
+ outputNameList
+ .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField
+ .getName() : fieldNames.get(i));
+ outputProjList.add(rexBuilder.makeInputRef(rel, source));
+ }
+
+ return create(rel, (List) outputProjList, outputNameList);
+ }
+
+ public ProjectRelBase copy(RelTraitSet traitSet, RelNode input, List exps,
+ RelDataType rowType) {
+ assert traitSet.containsIfApplicable(HiveRel.CONVENTION);
+ return new HiveProjectRel(getCluster(), traitSet, input, exps, rowType, getFlags());
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ /**
+ * Implementation of {@link ProjectFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel}
+ * .
+ */
+ private static class HiveProjectFactoryImpl implements ProjectFactory {
+ @Override
+ public RelNode createProject(RelNode input, List exps, List fieldNames) {
+ RelNode project = HiveProjectRel.create(input, exps, fieldNames);
+
+ // Make sure extra traits are carried over from the original rel
+ project = RelOptRule.convert(project, input.getTraitSet());
+ return project;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java
new file mode 100644
index 0000000..6f3f1d8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveRel.java
@@ -0,0 +1,19 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.Convention;
+
+public interface HiveRel extends RelNode {
+ void implement(Implementor implementor);
+
+ /** Calling convention for relational operations that occur in Hive. */
+ final Convention CONVENTION = new Convention.Impl("HIVE", HiveRel.class);
+
+ class Implementor {
+
+ public void visitChild(int ordinal, RelNode input) {
+ assert ordinal == 0;
+ ((HiveRel) input).implement(this);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java
new file mode 100644
index 0000000..1c42a29
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java
@@ -0,0 +1,36 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SortRel;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.rex.RexNode;
+
+public class HiveSortRel extends SortRel implements HiveRel {
+
+ public HiveSortRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ RelCollation collation, RexNode offset, RexNode fetch) {
+ super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
+ offset, fetch);
+
+ assert getConvention() == child.getConvention();
+ }
+
+ @Override
+ public HiveSortRel copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
+ RexNode offset, RexNode fetch) {
+ // TODO: can we blindly copy sort trait? What if inputs changed and we
+ // are now sorting by different cols
+ RelCollation canonizedCollation = traitSet.canonize(newCollation);
+ return new HiveSortRel(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ }
+
+ public RexNode getFetchExpr() {
+ return fetch;
+ }
+
+ public void implement(Implementor implementor) {
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
new file mode 100644
index 0000000..6e32062
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
@@ -0,0 +1,75 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveCost;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptCost;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+
+
+/**
+ * Relational expression representing a scan of a HiveDB collection.
+ *
+ *
+ * Additional operations might be applied, using the "find" or "aggregate"
+ * methods.
+ *
+ */
+public class HiveTableScanRel extends TableAccessRelBase implements HiveRel {
+
+ /**
+ * Creates a HiveTableScan.
+ *
+ * @param cluster
+ * Cluster
+ * @param traitSet
+ * Traits
+ * @param table
+ * Table
+ * @param table
+ * HiveDB table
+ */
+ public HiveTableScanRel(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
+ RelDataType rowtype) {
+ super(cluster, TraitsUtil.getTableScanTraitSet(cluster, traitSet, table, rowtype), table);
+ assert getConvention() == HiveRel.CONVENTION;
+ }
+
+ @Override
+ public RelNode copy(RelTraitSet traitSet, List inputs) {
+ assert inputs.isEmpty();
+ return this;
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void register(RelOptPlanner planner) {
+
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+
+ }
+
+ @Override
+ public double getRows() {
+ return ((RelOptHiveTable) table).getRowCount();
+ }
+
+ public List getColStat(List projIndxLst) {
+ return ((RelOptHiveTable) table).getColStat(projIndxLst);
+ }
+}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java
new file mode 100644
index 0000000..b81f3c8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java
@@ -0,0 +1,25 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel.Implementor;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SetOpRel;
+import org.eigenbase.rel.UnionRelBase;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+
+public class HiveUnionRel extends UnionRelBase {
+
+ public HiveUnionRel(RelOptCluster cluster, RelTraitSet traits, List inputs) {
+ super(cluster, traits, inputs, true);
+ }
+
+ @Override
+ public SetOpRel copy(RelTraitSet traitSet, List inputs, boolean all) {
+ return new HiveUnionRel(this.getCluster(), traitSet, inputs);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java
new file mode 100644
index 0000000..a34b532
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveMergeProjectRule.java
@@ -0,0 +1,12 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.eigenbase.rel.rules.MergeProjectRule;
+
+public class HiveMergeProjectRule extends MergeProjectRule {
+ public static final HiveMergeProjectRule INSTANCE = new HiveMergeProjectRule();
+
+ public HiveMergeProjectRule() {
+ super(true, HiveProjectRel.DEFAULT_PROJECT_FACTORY);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
new file mode 100644
index 0000000..6f06c6a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.util.Pair;
+
+public class HivePartitionPrunerRule extends RelOptRule {
+
+ HiveConf conf;
+
+ public HivePartitionPrunerRule(HiveConf conf) {
+ super(operand(HiveFilterRel.class, operand(HiveTableScanRel.class, none())));
+ this.conf = conf;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilterRel filter = call.rel(0);
+ HiveTableScanRel tScan = call.rel(1);
+ perform(call, filter, tScan);
+ }
+
+ protected void perform(RelOptRuleCall call, FilterRelBase filter,
+ HiveTableScanRel tScan) {
+
+ RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
+ RexNode predicate = filter.getCondition();
+
+ Pair predicates = PartitionPruner
+ .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
+ RexNode partColExpr = predicates.left;
+ RexNode remainingExpr = predicates.right;
+ remainingExpr = remainingExpr == null ? filter.getCluster().getRexBuilder()
+ .makeLiteral(true) : remainingExpr;
+ hiveTable.computePartitionList(conf, partColExpr);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
new file mode 100644
index 0000000..f8d1ac1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
@@ -0,0 +1,276 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptRuleCall;
+import org.eigenbase.relopt.RelOptRuleOperand;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.util.Holder;
+
+import com.google.common.collect.ImmutableList;
+
+public abstract class HivePushFilterPastJoinRule extends RelOptRule {
+
+ public static final HivePushFilterPastJoinRule FILTER_ON_JOIN = new HivePushFilterPastJoinRule(
+ operand(FilterRelBase.class, operand(HiveJoinRel.class, any())),
+ "HivePushFilterPastJoinRule:filter", true) {
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilterRel filter = call.rel(0);
+ HiveJoinRel join = call.rel(1);
+ perform(call, filter, join);
+ }
+ };
+
+ public static final HivePushFilterPastJoinRule JOIN = new HivePushFilterPastJoinRule(
+ operand(HiveJoinRel.class, any()), "HivePushFilterPastJoinRule:no-filter", false) {
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveJoinRel join = call.rel(0);
+ perform(call, null, join);
+ }
+ };
+
+ /** Whether to try to strengthen join-type. */
+ private final boolean smart;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a PushFilterPastJoinRule with an explicit root operand.
+ */
+ private HivePushFilterPastJoinRule(RelOptRuleOperand operand, String id, boolean smart) {
+ super(operand, "PushFilterRule: " + id);
+ this.smart = smart;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ protected void perform(RelOptRuleCall call, FilterRelBase filter,
+ JoinRelBase join) {
+ final List joinFilters = RelOptUtil.conjunctions(join
+ .getCondition());
+
+ /*
+ * todo: hb 6/26/14 for left SemiJoin we cannot push predicates yet. The
+ * assertion that num(JoinRel columns) = num(leftSrc) + num(rightSrc)
+ * doesn't hold. So RelOptUtil.classifyFilters fails.
+ */
+ if (((HiveJoinRel) join).isLeftSemiJoin()) {
+ return;
+ }
+
+ if (filter == null) {
+ // There is only the joinRel
+ // make sure it does not match a cartesian product joinRel
+ // (with "true" condition) otherwise this rule will be applied
+ // again on the new cartesian product joinRel.
+ boolean onlyTrueFilter = true;
+ for (RexNode joinFilter : joinFilters) {
+ if (!joinFilter.isAlwaysTrue()) {
+ onlyTrueFilter = false;
+ break;
+ }
+ }
+
+ if (onlyTrueFilter) {
+ return;
+ }
+ }
+
+ final List aboveFilters = filter != null ? RelOptUtil
+ .conjunctions(filter.getCondition()) : ImmutableList. of();
+
+ List leftFilters = new ArrayList();
+ List rightFilters = new ArrayList();
+ int origJoinFiltersSz = joinFilters.size();
+
+ // TODO - add logic to derive additional filters. E.g., from
+ // (t1.a = 1 AND t2.a = 2) OR (t1.b = 3 AND t2.b = 4), you can
+ // derive table filters:
+ // (t1.a = 1 OR t1.b = 3)
+ // (t2.a = 2 OR t2.b = 4)
+
+ // Try to push down above filters. These are typically where clause
+ // filters. They can be pushed down if they are not on the NULL
+ // generating side.
+ boolean filterPushed = false;
+ final Holder joinTypeHolder = Holder.of(join.getJoinType());
+ if (RelOptUtil.classifyFilters(join, aboveFilters,
+ join.getJoinType(), true, !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType()
+ .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+ filterPushed = true;
+ }
+
+ /*
+ * Any predicates pushed down to joinFilters that aren't equality
+ * conditions: put them back as aboveFilters because Hive doesn't support
+ * not equi join conditions.
+ */
+ ListIterator filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ if (c.getOperator().getKind() == SqlKind.EQUALS) {
+ boolean validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left & right side
+ // of join condition. Example: Hive disallows
+ // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ if (validHiveJoinFilter)
+ continue;
+ }
+ }
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+
+ /*
+ * if all pushed filters where put back then set filterPushed to false
+ */
+ if (leftFilters.size() == 0 && rightFilters.size() == 0
+ && joinFilters.size() == origJoinFiltersSz) {
+ filterPushed = false;
+ }
+
+ // Try to push down filters in ON clause. A ON clause filter can only be
+ // pushed down if it does not affect the non-matching set, i.e. it is
+ // not on the side which is preserved.
+ if (RelOptUtil.classifyFilters(join, joinFilters, null, false, !join
+ .getJoinType().generatesNullsOnRight(), !join.getJoinType()
+ .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
+ filterPushed = true;
+ }
+
+ if (!filterPushed) {
+ return;
+ }
+
+ /*
+ * Remove always true conditions that got pushed down.
+ */
+ removeAlwaysTruePredicates(leftFilters);
+ removeAlwaysTruePredicates(rightFilters);
+ removeAlwaysTruePredicates(joinFilters);
+
+ // create FilterRels on top of the children if any filters were
+ // pushed to them
+ RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+ RelNode leftRel = createFilterOnRel(rexBuilder, join.getLeft(), leftFilters);
+ RelNode rightRel = createFilterOnRel(rexBuilder, join.getRight(),
+ rightFilters);
+
+ // create the new join node referencing the new children and
+ // containing its new join filters (if there are any)
+ RexNode joinFilter;
+
+ if (joinFilters.size() == 0) {
+ // if nothing actually got pushed and there is nothing leftover,
+ // then this rule is a no-op
+ if (leftFilters.isEmpty()
+ && rightFilters.isEmpty()
+ && joinTypeHolder.get() == join.getJoinType()) {
+ return;
+ }
+ joinFilter = rexBuilder.makeLiteral(true);
+ } else {
+ joinFilter = RexUtil.composeConjunction(rexBuilder, joinFilters, true);
+ }
+ RelNode newJoinRel = HiveJoinRel.getJoin(join.getCluster(), leftRel,
+ rightRel, joinFilter, join.getJoinType(), false);
+
+ // create a FilterRel on top of the join if needed
+ RelNode newRel = createFilterOnRel(rexBuilder, newJoinRel, aboveFilters);
+
+ call.transformTo(newRel);
+ }
+
+ /**
+ * If the filter list passed in is non-empty, creates a FilterRel on top of
+ * the existing RelNode; otherwise, just returns the RelNode
+ *
+ * @param rexBuilder
+ * rex builder
+ * @param rel
+ * the RelNode that the filter will be put on top of
+ * @param filters
+ * list of filters
+ * @return new RelNode or existing one if no filters
+ */
+ private RelNode createFilterOnRel(RexBuilder rexBuilder, RelNode rel,
+ List filters) {
+ RexNode andFilters = RexUtil.composeConjunction(rexBuilder, filters, false);
+ if (andFilters.isAlwaysTrue()) {
+ return rel;
+ }
+ return new HiveFilterRel(rel.getCluster(), rel.getCluster().traitSetOf(
+ HiveRel.CONVENTION), rel, andFilters);
+ }
+
+ private void removeAlwaysTruePredicates(List predicates) {
+
+ ListIterator iter = predicates.listIterator();
+ while (iter.hasNext()) {
+ RexNode exp = iter.next();
+ if (isAlwaysTrue(exp)) {
+ iter.remove();
+ }
+ }
+ }
+
+ private boolean isAlwaysTrue(RexNode predicate) {
+ if (predicate instanceof RexCall) {
+ RexCall c = (RexCall) predicate;
+ if (c.getOperator().getKind() == SqlKind.EQUALS) {
+ return isAlwaysTrue(c.getOperands().get(0))
+ && isAlwaysTrue(c.getOperands().get(1));
+ }
+ }
+ return predicate.isAlwaysTrue();
+ }
+
+ private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
+ boolean refersToBothSides = false;
+
+ int joinNoOfProjects = j.getRowType().getFieldCount();
+ BitSet filterProjs = new BitSet(joinNoOfProjects);
+ BitSet allLeftProjs = new BitSet(joinNoOfProjects);
+ BitSet allRightProjs = new BitSet(joinNoOfProjects);
+ allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true);
+ allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true);
+
+ InputFinder inputFinder = new InputFinder(filterProjs);
+ filter.accept(inputFinder);
+
+ if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+ refersToBothSides = true;
+
+ return refersToBothSides;
+ }
+}
+
+// End PushFilterPastJoinRule.java
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java
new file mode 100644
index 0000000..c28f974
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java
@@ -0,0 +1,941 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import net.hydromatic.linq4j.Ord;
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.AggregateRel;
+import org.eigenbase.rel.CalcRel;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.JoinRel;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelFieldCollation;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.SetOpRel;
+import org.eigenbase.rel.SortRel;
+import org.eigenbase.rel.TableFunctionRel;
+import org.eigenbase.rel.TableModificationRel;
+import org.eigenbase.rel.ValuesRel;
+import org.eigenbase.rel.rules.RemoveTrivialProjectRule;
+import org.eigenbase.rel.rules.SemiJoinRel;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.reltype.RelDataTypeImpl;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexPermuteInputsShuttle;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.rex.RexVisitor;
+import org.eigenbase.sql.validate.SqlValidator;
+import org.eigenbase.util.Bug;
+import org.eigenbase.util.Pair;
+import org.eigenbase.util.ReflectUtil;
+import org.eigenbase.util.ReflectiveVisitor;
+import org.eigenbase.util.Util;
+import org.eigenbase.util.mapping.IntPair;
+import org.eigenbase.util.mapping.Mapping;
+import org.eigenbase.util.mapping.MappingType;
+import org.eigenbase.util.mapping.Mappings;
+
+/**
+ * Transformer that walks over a tree of relational expressions, replacing each
+ * {@link RelNode} with a 'slimmed down' relational expression that projects
+ * only the columns required by its consumer.
+ *
+ *
+ * Uses multi-methods to fire the right rule for each type of relational
+ * expression. This allows the transformer to be extended without having to add
+ * a new method to RelNode, and without requiring a collection of rule classes
+ * scattered to the four winds.
+ *
+ *
+ * REVIEW: jhyde, 2009/7/28: Is sql2rel the correct package for this class?
+ * Trimming fields is not an essential part of SQL-to-Rel translation, and
+ * arguably belongs in the optimization phase. But this transformer does not
+ * obey the usual pattern for planner rules; it is difficult to do so, because
+ * each {@link RelNode} needs to return a different set of fields after
+ * trimming.
+ *
+ *
+ * TODO: Change 2nd arg of the {@link #trimFields} method from BitSet to
+ * Mapping. Sometimes it helps the consumer if you return the columns in a
+ * particular order. For instance, it may avoid a project at the top of the tree
+ * just for reordering. Could ease the transition by writing methods that
+ * convert BitSet to Mapping and vice versa.
+ */
+public class HiveRelFieldTrimmer implements ReflectiveVisitor {
+ // ~ Static fields/initializers ---------------------------------------------
+
+ // ~ Instance fields --------------------------------------------------------
+
+ private final ReflectUtil.MethodDispatcher trimFieldsDispatcher;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a RelFieldTrimmer.
+ *
+ * @param validator
+ * Validator
+ */
+ public HiveRelFieldTrimmer(SqlValidator validator) {
+ Util.discard(validator); // may be useful one day
+ this.trimFieldsDispatcher = ReflectUtil.createMethodDispatcher(
+ TrimResult.class, this, "trimFields", RelNode.class, BitSet.class,
+ Set.class);
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ /**
+ * Trims unused fields from a relational expression.
+ *
+ *
+ * We presume that all fields of the relational expression are wanted by its
+ * consumer, so only trim fields that are not used within the tree.
+ *
+ * @param root
+ * Root node of relational expression
+ * @return Trimmed relational expression
+ */
+ public RelNode trim(RelNode root) {
+ final int fieldCount = root.getRowType().getFieldCount();
+ final BitSet fieldsUsed = BitSets.range(fieldCount);
+ final Set extraFields = Collections.emptySet();
+ final TrimResult trimResult = dispatchTrimFields(root, fieldsUsed,
+ extraFields);
+ if (!trimResult.right.isIdentity()) {
+ throw new IllegalArgumentException();
+ }
+ return trimResult.left;
+ }
+
+ /**
+ * Trims the fields of an input relational expression.
+ *
+ * @param rel
+ * Relational expression
+ * @param input
+ * Input relational expression, whose fields to trim
+ * @param fieldsUsed
+ * Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected TrimResult trimChild(RelNode rel, RelNode input, BitSet fieldsUsed,
+ Set extraFields) {
+ Util.discard(rel);
+ if (input.getClass().getName().endsWith("MedMdrClassExtentRel")) {
+ // MedMdrJoinRule cannot handle Join of Project of
+ // MedMdrClassExtentRel, only naked MedMdrClassExtentRel.
+ // So, disable trimming.
+ fieldsUsed = BitSets.range(input.getRowType().getFieldCount());
+ }
+ return dispatchTrimFields(input, fieldsUsed, extraFields);
+ }
+
+ /**
+ * Trims a child relational expression, then adds back a dummy project to
+ * restore the fields that were removed.
+ *
+ *
+ * Sounds pointless? It causes unused fields to be removed further down the
+ * tree (towards the leaves), but it ensure that the consuming relational
+ * expression continues to see the same fields.
+ *
+ * @param rel
+ * Relational expression
+ * @param input
+ * Input relational expression, whose fields to trim
+ * @param fieldsUsed
+ * Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected TrimResult trimChildRestore(RelNode rel, RelNode input,
+ BitSet fieldsUsed, Set extraFields) {
+ TrimResult trimResult = trimChild(rel, input, fieldsUsed, extraFields);
+ if (trimResult.right.isIdentity()) {
+ return trimResult;
+ }
+ final RelDataType rowType = input.getRowType();
+ List fieldList = rowType.getFieldList();
+ final List exprList = new ArrayList();
+ final List nameList = rowType.getFieldNames();
+ RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+ assert trimResult.right.getSourceCount() == fieldList.size();
+ for (int i = 0; i < fieldList.size(); i++) {
+ int source = trimResult.right.getTargetOpt(i);
+ RelDataTypeField field = fieldList.get(i);
+ exprList.add(source < 0 ? rexBuilder.makeZeroLiteral(field.getType())
+ : rexBuilder.makeInputRef(field.getType(), source));
+ }
+ RelNode project = CalcRel
+ .createProject(trimResult.left, exprList, nameList);
+ return new TrimResult(project, Mappings.createIdentity(fieldList.size()));
+ }
+
+ /**
+ * Invokes {@link #trimFields}, or the appropriate method for the type of the
+ * rel parameter, using multi-method dispatch.
+ *
+ * @param rel
+ * Relational expression
+ * @param fieldsUsed
+ * Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected final TrimResult dispatchTrimFields(RelNode rel, BitSet fieldsUsed,
+ Set extraFields) {
+ final TrimResult trimResult = trimFieldsDispatcher.invoke(rel, fieldsUsed,
+ extraFields);
+ final RelNode newRel = trimResult.left;
+ final Mapping mapping = trimResult.right;
+ final int fieldCount = rel.getRowType().getFieldCount();
+ assert mapping.getSourceCount() == fieldCount : "source: "
+ + mapping.getSourceCount() + " != " + fieldCount;
+ final int newFieldCount = newRel.getRowType().getFieldCount();
+ assert mapping.getTargetCount() + extraFields.size() == newFieldCount : "target: "
+ + mapping.getTargetCount()
+ + " + "
+ + extraFields.size()
+ + " != "
+ + newFieldCount;
+ if (Bug.TODO_FIXED) {
+ assert newFieldCount > 0 : "rel has no fields after trim: " + rel;
+ }
+ if (newRel.equals(rel)) {
+ return new TrimResult(rel, mapping);
+ }
+ return trimResult;
+ }
+
+ /**
+ * Visit method, per {@link org.eigenbase.util.ReflectiveVisitor}.
+ *
+ *
+ * This method is invoked reflectively, so there may not be any apparent calls
+ * to it. The class (or derived classes) may contain overloads of this method
+ * with more specific types for the {@code rel} parameter.
+ *
+ *
+ * Returns a pair: the relational expression created, and the mapping between
+ * the original fields and the fields of the newly created relational
+ * expression.
+ *
+ * @param rel
+ * Relational expression
+ * @param fieldsUsed
+ * Fields needed by the consumer
+ * @return relational expression and mapping
+ */
+ public TrimResult trimFields(HiveRel rel, BitSet fieldsUsed,
+ Set extraFields) {
+ // We don't know how to trim this kind of relational expression, so give
+ // it back intact.
+ Util.discard(fieldsUsed);
+ return new TrimResult(rel, Mappings.createIdentity(rel.getRowType()
+ .getFieldCount()));
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link HiveProjectRel} .
+ */
+ public TrimResult trimFields(HiveProjectRel project, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = project.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RelNode input = project.getChild();
+ final RelDataType inputRowType = input.getRowType();
+
+ // Which fields are required from the input?
+ BitSet inputFieldsUsed = new BitSet(inputRowType.getFieldCount());
+ final Set inputExtraFields = new LinkedHashSet(
+ extraFields);
+ RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(
+ inputFieldsUsed, inputExtraFields);
+ for (Ord ord : Ord.zip(project.getProjects())) {
+ if (fieldsUsed.get(ord.i)) {
+ ord.e.accept(inputFinder);
+ }
+ }
+
+ // Create input with trimmed columns.
+ TrimResult trimResult = trimChild(project, input, inputFieldsUsed,
+ inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input && fieldsUsed.cardinality() == fieldCount) {
+ return new TrimResult(project, Mappings.createIdentity(fieldCount));
+ }
+
+ // Some parts of the system can't handle rows with zero fields, so
+ // pretend that one field is used.
+ if (fieldsUsed.cardinality() == 0) {
+ final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ fieldCount, 1);
+ final RexLiteral expr = project.getCluster().getRexBuilder()
+ .makeExactLiteral(BigDecimal.ZERO);
+ RelDataType newRowType = project
+ .getCluster()
+ .getTypeFactory()
+ .createStructType(Collections.singletonList(expr.getType()),
+ Collections.singletonList("DUMMY"));
+ HiveProjectRel newProject = new HiveProjectRel(project.getCluster(),
+ project.getCluster().traitSetOf(HiveRel.CONVENTION), newInput,
+ Collections. singletonList(expr), newRowType,
+ project.getFlags());
+ return new TrimResult(newProject, mapping);
+ }
+
+ // Build new project expressions, and populate the mapping.
+ List newProjectExprList = new ArrayList();
+ final RexVisitor shuttle = new RexPermuteInputsShuttle(
+ inputMapping, newInput);
+ final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ fieldCount, fieldsUsed.cardinality());
+ for (Ord ord : Ord.zip(project.getProjects())) {
+ if (fieldsUsed.get(ord.i)) {
+ mapping.set(ord.i, newProjectExprList.size());
+ RexNode newProjectExpr = ord.e.accept(shuttle);
+ newProjectExprList.add(newProjectExpr);
+ }
+ }
+
+ final RelDataType newRowType = project.getCluster().getTypeFactory()
+ .createStructType(Mappings.apply3(mapping, rowType.getFieldList()));
+
+ final List newCollations = RexUtil.apply(inputMapping,
+ project.getCollationList());
+
+ final RelNode newProject;
+ if (RemoveTrivialProjectRule.isIdentity(newProjectExprList, newRowType,
+ newInput.getRowType())) {
+ // The new project would be the identity. It is equivalent to return
+ // its child.
+ newProject = newInput;
+ } else {
+ newProject = new HiveProjectRel(project.getCluster(), project
+ .getCluster()
+ .traitSetOf(
+ newCollations.isEmpty() ? HiveRel.CONVENTION : newCollations
+ .get(0)), newInput, newProjectExprList, newRowType,
+ project.getFlags());
+ assert newProject.getClass() == project.getClass();
+ }
+ return new TrimResult(newProject, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link HiveFilterRel}.
+ */
+ public TrimResult trimFields(HiveFilterRel filter, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = filter.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RexNode conditionExpr = filter.getCondition();
+ final RelNode input = filter.getChild();
+
+ // We use the fields used by the consumer, plus any fields used in the
+ // filter.
+ BitSet inputFieldsUsed = (BitSet) fieldsUsed.clone();
+ final Set inputExtraFields = new LinkedHashSet(
+ extraFields);
+ RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(
+ inputFieldsUsed, inputExtraFields);
+ conditionExpr.accept(inputFinder);
+
+ // Create input with trimmed columns.
+ TrimResult trimResult = trimChild(filter, input, inputFieldsUsed,
+ inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input && fieldsUsed.cardinality() == fieldCount) {
+ return new TrimResult(filter, Mappings.createIdentity(fieldCount));
+ }
+
+ // Build new project expressions, and populate the mapping.
+ final RexVisitor shuttle = new RexPermuteInputsShuttle(
+ inputMapping, newInput);
+ RexNode newConditionExpr = conditionExpr.accept(shuttle);
+
+ final HiveFilterRel newFilter = new HiveFilterRel(filter.getCluster(),
+ filter.getCluster().traitSetOf(HiveRel.CONVENTION), newInput,
+ newConditionExpr);
+ assert newFilter.getClass() == filter.getClass();
+
+ // The result has the same mapping as the input gave us. Sometimes we
+ // return fields that the consumer didn't ask for, because the filter
+ // needs them for its condition.
+ return new TrimResult(newFilter, inputMapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for {@link SortRel}.
+ */
+ public TrimResult trimFields(HiveSortRel sort, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = sort.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RelCollation collation = sort.getCollation();
+ final RelNode input = sort.getChild();
+
+ // We use the fields used by the consumer, plus any fields used as sort
+ // keys.
+ BitSet inputFieldsUsed = (BitSet) fieldsUsed.clone();
+ for (RelFieldCollation field : collation.getFieldCollations()) {
+ inputFieldsUsed.set(field.getFieldIndex());
+ }
+
+ // Create input with trimmed columns.
+ final Set inputExtraFields = Collections.emptySet();
+ TrimResult trimResult = trimChild(sort, input, inputFieldsUsed,
+ inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input && inputMapping.isIdentity()
+ && fieldsUsed.cardinality() == fieldCount) {
+ return new TrimResult(sort, Mappings.createIdentity(fieldCount));
+ }
+
+ final SortRel newSort = sort.copy(sort.getTraitSet(), newInput,
+ RexUtil.apply(inputMapping, collation));
+ assert newSort.getClass() == sort.getClass();
+
+ // The result has the same mapping as the input gave us. Sometimes we
+ // return fields that the consumer didn't ask for, because the filter
+ // needs them for its condition.
+ return new TrimResult(newSort, inputMapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for {@link JoinRel}.
+ *
+ * Have to do this because of the way ReflectUtil works. - if there is an
+ * exact match, things are fine. - otherwise it doesn't allow any ambiguity(in
+ * this case between a superClass(JoinRelBase) and an interface(HiveRel).
+ */
+ private TrimResult _trimFields(JoinRelBase join, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = join.getRowType();
+ final int fieldCount = join.getSystemFieldList().size() +
+ join.getLeft().getRowType().getFieldCount() +
+ join.getRight().getRowType().getFieldCount();
+ final RexNode conditionExpr = join.getCondition();
+ final int systemFieldCount = join.getSystemFieldList().size();
+
+ // Add in fields used in the condition.
+ BitSet fieldsUsedPlus = (BitSet) fieldsUsed.clone();
+ final Set combinedInputExtraFields = new LinkedHashSet(
+ extraFields);
+ RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(
+ fieldsUsedPlus, combinedInputExtraFields);
+ conditionExpr.accept(inputFinder);
+
+ // If no system fields are used, we can remove them.
+ int systemFieldUsedCount = 0;
+ for (int i = 0; i < systemFieldCount; ++i) {
+ if (fieldsUsed.get(i)) {
+ ++systemFieldUsedCount;
+ }
+ }
+ final int newSystemFieldCount;
+ if (systemFieldUsedCount == 0) {
+ newSystemFieldCount = 0;
+ } else {
+ newSystemFieldCount = systemFieldCount;
+ }
+
+ int offset = systemFieldCount;
+ int changeCount = 0;
+ int newFieldCount = newSystemFieldCount;
+ List newInputs = new ArrayList(2);
+ List inputMappings = new ArrayList();
+ List inputExtraFieldCounts = new ArrayList();
+ for (RelNode input : join.getInputs()) {
+ final RelDataType inputRowType = input.getRowType();
+ final int inputFieldCount = inputRowType.getFieldCount();
+
+ // Compute required mapping.
+ BitSet inputFieldsUsed = new BitSet(inputFieldCount);
+ for (int bit : BitSets.toIter(fieldsUsedPlus)) {
+ if (bit >= offset && bit < offset + inputFieldCount) {
+ inputFieldsUsed.set(bit - offset);
+ }
+ }
+
+ // If there are system fields, we automatically use the
+ // corresponding field in each input.
+ if (newSystemFieldCount > 0) {
+ // calling with newSystemFieldCount == 0 should be safe but hits
+ // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6222207
+ inputFieldsUsed.set(0, newSystemFieldCount);
+ }
+
+ // FIXME: We ought to collect extra fields for each input
+ // individually. For now, we assume that just one input has
+ // on-demand fields.
+ Set inputExtraFields = RelDataTypeImpl.extra(rowType) == null ? Collections
+ . emptySet() : combinedInputExtraFields;
+ inputExtraFieldCounts.add(inputExtraFields.size());
+
+ // Cross Join may not reference any thing from below
+ // Ex: select R1.x from r1 join r2;
+ if (inputExtraFields.size() == 0 && inputFieldsUsed.isEmpty()) {
+ inputFieldsUsed.set(0);
+ }
+
+ TrimResult trimResult = trimChild(join, input, inputFieldsUsed,
+ inputExtraFields);
+ newInputs.add(trimResult.left);
+ if (trimResult.left != input) {
+ ++changeCount;
+ }
+
+ final Mapping inputMapping = trimResult.right;
+ inputMappings.add(inputMapping);
+
+ // Move offset to point to start of next input.
+ offset += inputFieldCount;
+ newFieldCount += inputMapping.getTargetCount() + inputExtraFields.size();
+ }
+
+ Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ fieldCount, newFieldCount);
+ for (int i = 0; i < newSystemFieldCount; ++i) {
+ mapping.set(i, i);
+ }
+ offset = systemFieldCount;
+ int newOffset = newSystemFieldCount;
+ for (int i = 0; i < inputMappings.size(); i++) {
+ Mapping inputMapping = inputMappings.get(i);
+ for (IntPair pair : inputMapping) {
+ mapping.set(pair.source + offset, pair.target + newOffset);
+ }
+ offset += inputMapping.getSourceCount();
+ newOffset += inputMapping.getTargetCount() + inputExtraFieldCounts.get(i);
+ }
+
+ if (changeCount == 0 && mapping.isIdentity()) {
+ return new TrimResult(join, Mappings.createIdentity(fieldCount));
+ }
+
+ // Build new join.
+ final RexVisitor shuttle = new RexPermuteInputsShuttle(mapping,
+ newInputs.get(0), newInputs.get(1));
+ RexNode newConditionExpr = conditionExpr.accept(shuttle);
+
+ final JoinRelBase newJoin = join.copy(join.getTraitSet(), newConditionExpr,
+ newInputs.get(0), newInputs.get(1), join.getJoinType(), false);
+
+ /*
+ * For SemiJoins only map fields from the left-side
+ */
+ if ( join instanceof SemiJoinRel ) {
+ Mapping inputMapping = inputMappings.get(0);
+ mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ join.getRowType().getFieldCount(), newSystemFieldCount + inputMapping.getTargetCount());
+ for (int i = 0; i < newSystemFieldCount; ++i) {
+ mapping.set(i, i);
+ }
+ offset = systemFieldCount;
+ newOffset = newSystemFieldCount;
+ for (IntPair pair : inputMapping) {
+ mapping.set(pair.source + offset, pair.target + newOffset);
+ }
+ }
+
+ return new TrimResult(newJoin, mapping);
+ }
+
+ public TrimResult trimFields(HiveJoinRel join, BitSet fieldsUsed,
+ Set extraFields) {
+ return _trimFields(join, fieldsUsed, extraFields);
+ }
+
+ public TrimResult trimFields(SemiJoinRel join, BitSet fieldsUsed,
+ Set extraFields) {
+ return _trimFields(join, fieldsUsed, extraFields);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for {@link SetOpRel}
+ * (including UNION and UNION ALL).
+ */
+ public TrimResult trimFields(HiveUnionRel setOp, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = setOp.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ int changeCount = 0;
+
+ // Fennel abhors an empty row type, so pretend that the parent rel
+ // wants the last field. (The last field is the least likely to be a
+ // system field.)
+ if (fieldsUsed.isEmpty()) {
+ fieldsUsed.set(rowType.getFieldCount() - 1);
+ }
+
+ // Compute the desired field mapping. Give the consumer the fields they
+ // want, in the order that they appear in the bitset.
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+
+ // Create input with trimmed columns.
+ final List newInputs = new ArrayList();
+ for (RelNode input : setOp.getInputs()) {
+ TrimResult trimResult = trimChild(setOp, input, fieldsUsed, extraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // We want "mapping", the input gave us "inputMapping", compute
+ // "remaining" mapping.
+ // | | |
+ // |---------------- mapping ---------->|
+ // |-- inputMapping -->| |
+ // | |-- remaining -->|
+ //
+ // For instance, suppose we have columns [a, b, c, d],
+ // the consumer asked for mapping = [b, d],
+ // and the transformed input has columns inputMapping = [d, a, b].
+ // remaining will permute [b, d] to [d, a, b].
+ Mapping remaining = Mappings.divide(mapping, inputMapping);
+
+ // Create a projection; does nothing if remaining is identity.
+ newInput = HiveProjectRel.projectMapping(newInput, remaining, null);
+
+ if (input != newInput) {
+ ++changeCount;
+ }
+ newInputs.add(newInput);
+ }
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's to do.
+ if (changeCount == 0 && mapping.isIdentity()) {
+ return new TrimResult(setOp, mapping);
+ }
+
+ RelNode newSetOp = setOp.copy(setOp.getTraitSet(), newInputs, true);
+ return new TrimResult(newSetOp, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link AggregateRel}.
+ *
+ * @throws InvalidRelException
+ */
+ public TrimResult trimFields(HiveAggregateRel aggregate, BitSet fieldsUsed,
+ Set extraFields) throws InvalidRelException {
+ // Fields:
+ //
+ // | sys fields | group fields | agg functions |
+ //
+ // Two kinds of trimming:
+ //
+ // 1. If agg rel has system fields but none of these are used, create an
+ // agg rel with no system fields.
+ //
+ // 2. If aggregate functions are not used, remove them.
+ //
+ // But grouping fields stay, even if they are not used.
+
+ final RelDataType rowType = aggregate.getRowType();
+
+ // Compute which input fields are used.
+ BitSet inputFieldsUsed = new BitSet();
+ // 1. group fields are always used
+ for (int i : BitSets.toIter(aggregate.getGroupSet())) {
+ inputFieldsUsed.set(i);
+ }
+ // 2. agg functions
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ for (int i : aggCall.getArgList()) {
+ inputFieldsUsed.set(i);
+ }
+ }
+
+ // Create input with trimmed columns.
+ final RelNode input = aggregate.getInput(0);
+ final Set inputExtraFields = Collections.emptySet();
+ final TrimResult trimResult = trimChild(aggregate, input, inputFieldsUsed,
+ inputExtraFields);
+ final RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing to do.
+ if (input == newInput
+ && fieldsUsed.equals(BitSets.range(rowType.getFieldCount()))) {
+ return new TrimResult(aggregate, Mappings.createIdentity(rowType
+ .getFieldCount()));
+ }
+
+ // Which agg calls are used by our consumer?
+ final int groupCount = aggregate.getGroupSet().cardinality();
+ int j = groupCount;
+ int usedAggCallCount = 0;
+ for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+ if (fieldsUsed.get(j++)) {
+ ++usedAggCallCount;
+ }
+ }
+
+ // Offset due to the number of system fields having changed.
+ Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ rowType.getFieldCount(), groupCount + usedAggCallCount);
+
+ final BitSet newGroupSet = Mappings.apply(inputMapping,
+ aggregate.getGroupSet());
+
+ // Populate mapping of where to find the fields. System and grouping
+ // fields first.
+ for (IntPair pair : inputMapping) {
+ if (pair.source < groupCount) {
+ mapping.set(pair.source, pair.target);
+ }
+ }
+
+ // Now create new agg calls, and populate mapping for them.
+ final List newAggCallList = new ArrayList();
+ j = groupCount;
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ if (fieldsUsed.get(j)) {
+ AggregateCall newAggCall = new AggregateCall(aggCall.getAggregation(),
+ aggCall.isDistinct(), Mappings.apply2(inputMapping,
+ aggCall.getArgList()), aggCall.getType(), aggCall.getName());
+ if (newAggCall.equals(aggCall)) {
+ newAggCall = aggCall; // immutable -> canonize to save space
+ }
+ mapping.set(j, groupCount + newAggCallList.size());
+ newAggCallList.add(newAggCall);
+ }
+ ++j;
+ }
+
+ RelNode newAggregate = new HiveAggregateRel(aggregate.getCluster(),
+ aggregate.getCluster().traitSetOf(HiveRel.CONVENTION), newInput,
+ newGroupSet, newAggCallList);
+
+ assert newAggregate.getClass() == aggregate.getClass();
+
+ return new TrimResult(newAggregate, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link TableModificationRel}.
+ */
+ public TrimResult trimFields(TableModificationRel modifier,
+ BitSet fieldsUsed, Set extraFields) {
+ // Ignore what consumer wants. We always project all columns.
+ Util.discard(fieldsUsed);
+
+ final RelDataType rowType = modifier.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ RelNode input = modifier.getChild();
+
+ // We want all fields from the child.
+ final int inputFieldCount = input.getRowType().getFieldCount();
+ BitSet inputFieldsUsed = BitSets.range(inputFieldCount);
+
+ // Create input with trimmed columns.
+ final Set inputExtraFields = Collections.emptySet();
+ TrimResult trimResult = trimChild(modifier, input, inputFieldsUsed,
+ inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+ if (!inputMapping.isIdentity()) {
+ // We asked for all fields. Can't believe that the child decided
+ // to permute them!
+ throw Util.newInternal("Expected identity mapping, got " + inputMapping);
+ }
+
+ TableModificationRel newModifier = modifier;
+ if (newInput != input) {
+ newModifier = modifier.copy(modifier.getTraitSet(),
+ Collections.singletonList(newInput));
+ }
+ assert newModifier.getClass() == modifier.getClass();
+
+ // Always project all fields.
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return new TrimResult(newModifier, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link TableFunctionRel}.
+ */
+ public TrimResult trimFields(TableFunctionRel tabFun, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = tabFun.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ List newInputs = new ArrayList();
+
+ for (RelNode input : tabFun.getInputs()) {
+ final int inputFieldCount = input.getRowType().getFieldCount();
+ BitSet inputFieldsUsed = BitSets.range(inputFieldCount);
+
+ // Create input with trimmed columns.
+ final Set inputExtraFields = Collections.emptySet();
+ TrimResult trimResult = trimChildRestore(tabFun, input, inputFieldsUsed,
+ inputExtraFields);
+ assert trimResult.right.isIdentity();
+ newInputs.add(trimResult.left);
+ }
+
+ TableFunctionRel newTabFun = tabFun;
+ if (!tabFun.getInputs().equals(newInputs)) {
+ newTabFun = tabFun.copy(tabFun.getTraitSet(), newInputs);
+ }
+ assert newTabFun.getClass() == tabFun.getClass();
+
+ // Always project all fields.
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return new TrimResult(newTabFun, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link org.eigenbase.rel.ValuesRel}.
+ */
+ public TrimResult trimFields(ValuesRel values, BitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = values.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+
+ // If they are asking for no fields, we can't give them what they want,
+ // because zero-column records are illegal. Give them the last field,
+ // which is unlikely to be a system field.
+ if (fieldsUsed.isEmpty()) {
+ fieldsUsed = BitSets.range(fieldCount - 1, fieldCount);
+ }
+
+ // If all fields are used, return unchanged.
+ if (fieldsUsed.equals(BitSets.range(fieldCount))) {
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return new TrimResult(values, mapping);
+ }
+
+ List> newTuples = new ArrayList>();
+ for (List tuple : values.getTuples()) {
+ List newTuple = new ArrayList();
+ for (int field : BitSets.toIter(fieldsUsed)) {
+ newTuple.add(tuple.get(field));
+ }
+ newTuples.add(newTuple);
+ }
+
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+ RelDataType newRowType = values.getCluster().getTypeFactory()
+ .createStructType(Mappings.apply3(mapping, rowType.getFieldList()));
+ final ValuesRel newValues = new ValuesRel(values.getCluster(), newRowType,
+ newTuples);
+ return new TrimResult(newValues, mapping);
+ }
+
+ private Mapping createMapping(BitSet fieldsUsed, int fieldCount) {
+ final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ fieldCount, fieldsUsed.cardinality());
+ int i = 0;
+ for (int field : BitSets.toIter(fieldsUsed)) {
+ mapping.set(field, i++);
+ }
+ return mapping;
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, BitSet, Set)} for
+ * {@link org.eigenbase.rel.TableAccessRel}.
+ */
+ public TrimResult trimFields(final HiveTableScanRel tableAccessRel,
+ BitSet fieldsUsed, Set extraFields) {
+ final int fieldCount = tableAccessRel.getRowType().getFieldCount();
+ if (fieldsUsed.isEmpty() || (fieldsUsed.equals(BitSets.range(fieldCount)) && extraFields.isEmpty())) {
+ return trimFields((HiveRel) tableAccessRel, fieldsUsed, extraFields);
+ }
+ final RelNode _newTableAccessRel = tableAccessRel.project(fieldsUsed,
+ extraFields);
+ final RelNode newTableAccessRel = HiveProjectRel.DEFAULT_PROJECT_FACTORY
+ .createProject(tableAccessRel, _newTableAccessRel.getChildExps(),
+ _newTableAccessRel.getRowType().getFieldNames());
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+ return new TrimResult(newTableAccessRel, mapping);
+ }
+
+ // ~ Inner Classes ----------------------------------------------------------
+
+ /**
+ * Result of an attempt to trim columns from a relational expression.
+ *
+ *
+ * The mapping describes where to find the columns wanted by the parent of the
+ * current relational expression.
+ *
+ *
+ * The mapping is a {@link org.eigenbase.util.mapping.Mappings.SourceMapping},
+ * which means that no column can be used more than once, and some columns are
+ * not used. {@code columnsUsed.getSource(i)} returns the source of the i'th
+ * output field.
+ *
+ *
+ * For example, consider the mapping for a relational expression that has 4
+ * output columns but only two are being used. The mapping {2 → 1, 3
+ * → 0} would give the following behavior:
+ *
+ *
+ *
+ * - columnsUsed.getSourceCount() returns 4
+ *
- columnsUsed.getTargetCount() returns 2
+ *
- columnsUsed.getSource(0) returns 3
+ *
- columnsUsed.getSource(1) returns 2
+ *
- columnsUsed.getSource(2) throws IndexOutOfBounds
+ *
- columnsUsed.getTargetOpt(3) returns 0
+ *
- columnsUsed.getTargetOpt(0) returns -1
+ *
+ */
+ protected static class TrimResult extends Pair {
+ /**
+ * Creates a TrimResult.
+ *
+ * @param left
+ * New relational expression
+ * @param right
+ * Mapping of fields onto original fields
+ */
+ public TrimResult(RelNode left, Mapping right) {
+ super(left, right);
+ }
+ }
+}
+
+// End RelFieldTrimmer.java
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java
new file mode 100644
index 0000000..d9d94f6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexLiteral;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexVisitorImpl;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.util.Pair;
+
+public class PartitionPruner {
+
+ /**
+ * Breaks the predicate into 2 pieces. The first piece is the expressions that
+ * only contain partition columns and can be used for Partition Pruning; the
+ * second piece is the predicates that are left.
+ *
+ * @param cluster
+ * @param hiveTable
+ * @param predicate
+ * @return a Pair of expressions, each of which maybe null. The 1st predicate
+ * is expressions that only contain partition columns; the 2nd
+ * predicate contains the remaining predicates.
+ */
+ public static Pair extractPartitionPredicates(
+ RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
+ RexNode partitionPruningPred = predicate
+ .accept(new ExtractPartPruningPredicate(cluster, hiveTable));
+ RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(
+ cluster, partitionPruningPred));
+ return new Pair(partitionPruningPred, remainingPred);
+ }
+
+ public static class ExtractPartPruningPredicate extends
+ RexVisitorImpl {
+
+ final RelOptHiveTable hiveTable;
+ final RelDataType rType;
+ final Set