diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java index 9e04284..c53d6ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -17,15 +17,21 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.LinkedList; import java.util.List; +import java.util.Set; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; @@ -100,4 +106,35 @@ public double getRows() { return ((RelOptHiveTable) table).getColStat(projIndxLst); } + @Override + public RelNode project(ImmutableBitSet fieldsUsed, Set extraFields, + RelFactories.ProjectFactory projectFactory) { + + // 1. If the schema is the same then bail out + final int fieldCount = getRowType().getFieldCount(); + if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) { + return this; + } + + // 2. Make sure there is no dynamic addition of virtual cols + if (extraFields != null && !extraFields.isEmpty()) { + throw new RuntimeException("Hive TS does not support adding virtual columns dynamically"); + } + + // 3. Create new TS schema that is a subset of original + final List fields = getRowType().getFieldList(); + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + for (int i : fieldsUsed) { + RelDataTypeField field = fields.get(i); + fieldTypes.add(field.getType()); + fieldNames.add(field.getName()); + } + + // 4. Build new TS + HiveTableScan newHT = copy(getCluster().getTypeFactory().createStructType(fieldTypes, + fieldNames)); + + return newHT; + } } \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 986213d..525fdae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -58,8 +58,10 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -606,13 +608,39 @@ Operator getOptimizedHiveOPDag() throws SemanticException { throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); } + RelNode modifiedOptimizedOptiqPlan = introduceProjectIfNeeded(optimizedOptiqPlan); + Operator hiveRoot = new HiveOpConverter(topOps, HiveOpConverter.getAggOPMode(conf), - conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(optimizedOptiqPlan); + conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan); RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB()); opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR)); return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot); } + private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan) + throws CalciteSemanticException { + RelNode parent = null; + RelNode input = optimizedOptiqPlan; + RelNode newRoot = optimizedOptiqPlan; + + while (!(input instanceof Project) && (input instanceof Sort)) { + parent = input; + input = input.getInput(0); + } + + if (!(input instanceof Project)) { + HiveProject hpRel = HiveProject.create(input, + HiveCalciteUtil.getProjsFromBelowAsInputRef(input), input.getRowType().getFieldNames()); + if (input == optimizedOptiqPlan) { + newRoot = hpRel; + } else { + parent.replaceInput(0, hpRel); + } + } + + return newRoot; + } + /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause.