diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java index ebab2cf..7c2b0cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java @@ -27,9 +27,11 @@ import java.util.Map.Entry; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.eigenbase.rel.RelFactories.ProjectFactory; import org.eigenbase.rel.RelNode; +import org.eigenbase.rel.SortRel; import org.eigenbase.relopt.RelOptUtil; import org.eigenbase.relopt.RelOptUtil.InputReferencedVisitor; import org.eigenbase.reltype.RelDataTypeField; @@ -485,4 +487,43 @@ private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoinRel return jlpi; } } + + public static boolean limitRelNode(RelNode rel) { + if ((rel instanceof SortRel) && ((SortRel) rel).getCollation().getFieldCollations().isEmpty()) + return true; + + return false; + } + + public static boolean orderRelNode(RelNode rel) { + if ((rel instanceof SortRel) && !((SortRel) rel).getCollation().getFieldCollations().isEmpty()) + return true; + + return false; + } + + /** + * Get top level select starting from root. Assumption here is root can only + * be SortRel & ProjectRel. Also the top project should be at most 2 levels + * below Sortrel; i.e SortRel(Limit)-SortRel(OB)-Select + * + * @param rootRel + * @return + */ + public static Pair getTopLevelSelect(final RelNode rootRel) { + RelNode tmpRel = rootRel; + RelNode parentOforiginalProjRel = rootRel; + HiveProjectRel originalProjRel = null; + + while (tmpRel != null) { + if (tmpRel instanceof HiveProjectRel) { + originalProjRel = (HiveProjectRel) tmpRel; + break; + } + parentOforiginalProjRel = tmpRel; + tmpRel = tmpRel.getInput(0); + } + + return (new Pair(parentOforiginalProjRel, originalProjRel)); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java index c643aa4..7b434ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java @@ -34,7 +34,6 @@ import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.relopt.RelOptCost; import org.eigenbase.relopt.RelOptPlanner; -import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelTraitSet; import org.eigenbase.reltype.RelDataType; import org.eigenbase.reltype.RelDataTypeField; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java index 82db7b1..f85363d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveSortRel.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; +import java.util.Map; + import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; import org.eigenbase.rel.RelCollation; import org.eigenbase.rel.RelFactories; @@ -26,10 +28,21 @@ import org.eigenbase.relopt.RelTraitSet; import org.eigenbase.rex.RexNode; +import com.google.common.collect.ImmutableMap; + public class HiveSortRel extends SortRel implements HiveRel { public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory(); + // NOTE: this is to work around Hive Optiq Limitations w.r.t OB. + // 1. Optiq can not accept expressions in OB; instead it needs to be expressed + // as VC in input Select. + // 2. Hive can not preserve ordering through select boundaries. + // 3. This map is used for outermost OB to migrate the VC corresponding OB + // expressions from input select. + // 4. This is used by ASTConverter after we are done with Optiq Planning + private ImmutableMap mapOfInputRefToRexCall; + public HiveSortRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RelCollation collation, RexNode offset, RexNode fetch) { super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation, @@ -49,6 +62,14 @@ public RexNode getFetchExpr() { return fetch; } + public void setInputRefToCallMap(ImmutableMap refToCall) { + this.mapOfInputRefToRexCall = refToCall; + } + + public Map getInputRefToCallMap() { + return this.mapOfInputRefToRexCall; + } + @Override public void implement(Implementor implementor) { } @@ -56,8 +77,8 @@ public void implement(Implementor implementor) { private static class HiveSortRelFactory implements RelFactories.SortFactory { @Override - public RelNode createSort(RelTraitSet traits, RelNode child, - RelCollation collation, RexNode offset, RexNode fetch) { + public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation, + RexNode offset, RexNode fetch) { return new HiveSortRel(child.getCluster(), traits, child, collation, offset, fetch); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java index a9eab4c..f5a704f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; +import java.util.Map; import net.hydromatic.optiq.util.BitSets; @@ -64,39 +64,34 @@ public class ASTConverter { - RelNode root; - HiveAST hiveAST; - RelNode from; - FilterRelBase where; - AggregateRelBase groupBy; - FilterRelBase having; - ProjectRelBase select; - SortRel order; + private RelNode root; + private HiveAST hiveAST; + private RelNode from; + private FilterRelBase where; + private AggregateRelBase groupBy; + private FilterRelBase having; + private ProjectRelBase select; + private SortRel order; + private SortRel limit; - Schema schema; + private Schema schema; - ASTConverter(RelNode root) { + private long derivedTableCount; + + ASTConverter(RelNode root, long dtCounterInitVal) { this.root = root; hiveAST = new HiveAST(); + this.derivedTableCount = dtCounterInitVal; } public static ASTNode convert(final RelNode relNode, List resultSchema) throws OptiqSemanticException { - SortRel sortrel = null; - RelNode root = DerivedTableInjector.convertOpTree(relNode, resultSchema); - - if (root instanceof SortRel) { - sortrel = (SortRel) root; - root = sortrel.getChild(); - if (!(root instanceof ProjectRelBase)) - throw new RuntimeException("Child of root sort node is not a project"); - } - - ASTConverter c = new ASTConverter(root); - return c.convert(sortrel); + RelNode root = PlanModifierForASTConv.convertOpTree(relNode, resultSchema); + ASTConverter c = new ASTConverter(root, 0); + return c.convert(); } - public ASTNode convert(SortRel sortrel) { + private ASTNode convert() { /* * 1. Walk RelNode Graph; note from, where, gBy.. nodes. */ @@ -167,35 +162,67 @@ public ASTNode convert(SortRel sortrel) { * to its src/from. Hence the need to pass in sortRel for each block from * its parent. */ - if (sortrel != null) { - HiveSortRel hiveSort = (HiveSortRel) sortrel; + convertOBToASTNode((HiveSortRel) order); + + // 8. Limit + convertLimitToASTNode((HiveSortRel) limit); + + return hiveAST.getAST(); + } + + private void convertLimitToASTNode(HiveSortRel limit) { + if (limit != null) { + HiveSortRel hiveLimit = (HiveSortRel) limit; + RexNode limitExpr = hiveLimit.getFetchExpr(); + if (limitExpr != null) { + Object val = ((RexLiteral) limitExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(val); + } + } + } + + private void convertOBToASTNode(HiveSortRel order) { + if (order != null) { + HiveSortRel hiveSort = (HiveSortRel) order; if (!hiveSort.getCollation().getFieldCollations().isEmpty()) { + // 1 Add order by token ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - schema = new Schema((HiveSortRel) sortrel); + + schema = new Schema((HiveSortRel) hiveSort); + Map obRefToCallMap = hiveSort.getInputRefToCallMap(); + RexNode obExpr; + ASTNode astCol; for (RelFieldCollation c : hiveSort.getCollation().getFieldCollations()) { - ColumnInfo cI = schema.get(c.getFieldIndex()); - /* - * The RowResolver setup for Select drops Table associations. So setup - * ASTNode on unqualified name. - */ - ASTNode astCol = ASTBuilder.unqualifiedName(cI.column); - ASTNode astNode = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + + // 2 Add Direction token + ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - astNode.addChild(astCol); - orderAst.addChild(astNode); + + // 3 Convert OB expr (OB Expr is usually an input ref except for top + // level OB; top level OB will have RexCall kept in a map.) + obExpr = null; + if (obRefToCallMap != null) + obExpr = obRefToCallMap.get(c.getFieldIndex()); + + if (obExpr != null) { + astCol = obExpr.accept(new RexVisitor(schema)); + } else { + ColumnInfo cI = schema.get(c.getFieldIndex()); + /* + * The RowResolver setup for Select drops Table associations. So + * setup ASTNode on unqualified name. + */ + astCol = ASTBuilder.unqualifiedName(cI.column); + } + + // 4 buildup the ob expr AST + directionAST.addChild(astCol); + orderAst.addChild(directionAST); } hiveAST.order = orderAst; } - RexNode limitExpr = hiveSort.getFetchExpr(); - if (limitExpr != null) { - Object val = ((RexLiteral) limitExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(val); - } - } - - return hiveAST.getAST(); } private Schema getRowSchema(String tblAlias) { @@ -224,20 +251,20 @@ private QueryBlockInfo convertSource(RelNode r) { RelNode leftInput = ((UnionRelBase) r).getInput(0); RelNode rightInput = ((UnionRelBase) r).getInput(1); - ASTConverter leftConv = new ASTConverter(leftInput); - ASTConverter rightConv = new ASTConverter(rightInput); - ASTNode leftAST = leftConv.convert((SortRel) null); - ASTNode rightAST = rightConv.convert((SortRel) null); + ASTConverter leftConv = new ASTConverter(leftInput, this.derivedTableCount); + ASTConverter rightConv = new ASTConverter(rightInput, this.derivedTableCount); + ASTNode leftAST = leftConv.convert(); + ASTNode rightAST = rightConv.convert(); ASTNode unionAST = getUnionAllAST(leftAST, rightAST); - String sqAlias = ASTConverter.nextAlias(); + String sqAlias = nextAlias(); ast = ASTBuilder.subQuery(unionAST, sqAlias); s = new Schema((UnionRelBase) r, sqAlias); } else { - ASTConverter src = new ASTConverter(r); - ASTNode srcAST = src.convert(order); - String sqAlias = ASTConverter.nextAlias(); + ASTConverter src = new ASTConverter(r, this.derivedTableCount); + ASTNode srcAST = src.convert(); + String sqAlias = nextAlias(); s = src.getRowSchema(sqAlias); ast = ASTBuilder.subQuery(srcAST, sqAlias); } @@ -279,7 +306,15 @@ public void visit(RelNode node, int ordinal, RelNode parent) { } else if (node instanceof AggregateRelBase) { ASTConverter.this.groupBy = (AggregateRelBase) node; } else if (node instanceof SortRel) { - ASTConverter.this.order = (SortRel) node; + if (ASTConverter.this.select != null) { + ASTConverter.this.from = node; + } else { + SortRel hiveSortRel = (SortRel) node; + if (hiveSortRel.getCollation().getFieldCollations().isEmpty()) + ASTConverter.this.limit = hiveSortRel; + else + ASTConverter.this.order = hiveSortRel; + } } /* * once the source node is reached; stop traversal for this QB @@ -312,7 +347,12 @@ public ASTNode visitInputRef(RexInputRef inputRef) { if (cI.agg != null) { return (ASTNode) ParseDriver.adaptor.dupTree(cI.agg); } - return ASTBuilder.qualifiedName(cI.table, cI.column); + + if (cI.table == null || cI.table.isEmpty()) + return ASTBuilder.unqualifiedName(cI.column); + else + return ASTBuilder.qualifiedName(cI.table, cI.column); + } @Override @@ -480,7 +520,7 @@ public QueryBlockInfo(Schema schema, ASTNode ast) { private static final long serialVersionUID = 1L; Schema(TableAccessRelBase scan) { - String tabName = ((RelOptHiveTable)scan.getTable()).getTableAlias(); + String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias(); for (RelDataTypeField field : scan.getRowType().getFieldList()) { add(new ColumnInfo(tabName, field.getName())); } @@ -571,12 +611,12 @@ public Schema(HiveSortRel order) { } } - static String nextAlias() { - return String.format("$hdt$_%d", derivedTableCounter.getAndIncrement()); + private String nextAlias() { + String tabAlias = String.format("$hdt$_%d", derivedTableCount); + derivedTableCount++; + return tabAlias; } - private static AtomicLong derivedTableCounter = new AtomicLong(0); - static class HiveAST { ASTNode from; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java similarity index 72% rename from ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java rename to ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java index a655174..3d90ae7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.optimizer.optiq.translator; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; @@ -32,21 +34,24 @@ import org.eigenbase.rel.JoinRelBase; import org.eigenbase.rel.OneRowRelBase; import org.eigenbase.rel.ProjectRelBase; +import org.eigenbase.rel.RelCollationImpl; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.SetOpRel; import org.eigenbase.rel.SingleRel; import org.eigenbase.rel.SortRel; -import org.eigenbase.rel.TableAccessRelBase; -import org.eigenbase.rel.TableFunctionRelBase; -import org.eigenbase.rel.ValuesRelBase; import org.eigenbase.rel.rules.MultiJoinRel; import org.eigenbase.relopt.hep.HepRelVertex; import org.eigenbase.relopt.volcano.RelSubset; +import org.eigenbase.reltype.RelDataType; import org.eigenbase.rex.RexNode; +import org.eigenbase.util.Pair; -public class DerivedTableInjector { +import com.google.common.collect.ImmutableMap; - public static RelNode convertOpTree(RelNode rel, List resultSchema) throws OptiqSemanticException { +public class PlanModifierForASTConv { + + public static RelNode convertOpTree(RelNode rel, List resultSchema) + throws OptiqSemanticException { RelNode newTopNode = rel; if (!(newTopNode instanceof ProjectRelBase) && !(newTopNode instanceof SortRel)) { @@ -54,7 +59,11 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema) } convertOpTree(newTopNode, (RelNode) null); - newTopNode = renameTopLevelSelectInResultSchema(newTopNode, resultSchema); + + Pair topSelparentPair = HiveOptiqUtil.getTopLevelSelect(newTopNode); + fixTopOBSchema(newTopNode, topSelparentPair, resultSchema); + topSelparentPair = HiveOptiqUtil.getTopLevelSelect(newTopNode); + newTopNode = renameTopLevelSelectInResultSchema(newTopNode, topSelparentPair, resultSchema); return newTopNode; } @@ -113,36 +122,68 @@ private static void convertOpTree(RelNode rel, RelNode parent) { } } - private static RelNode renameTopLevelSelectInResultSchema(final RelNode rootRel, - List resultSchema) throws OptiqSemanticException { - RelNode tmpRel = rootRel; - RelNode parentOforiginalProjRel = rootRel; - HiveProjectRel originalProjRel = null; - - while (tmpRel != null) { - if (tmpRel instanceof HiveProjectRel) { - originalProjRel = (HiveProjectRel) tmpRel; - break; + private static void fixTopOBSchema(final RelNode rootRel, + Pair topSelparentPair, List resultSchema) + throws OptiqSemanticException { + if (topSelparentPair.getKey() instanceof SortRel + && HiveOptiqUtil.orderRelNode(topSelparentPair.getKey())) { + HiveSortRel obRel = (HiveSortRel) topSelparentPair.getKey(); + ProjectRelBase obChild = (ProjectRelBase) topSelparentPair.getValue(); + + if (obChild.getRowType().getFieldCount() > resultSchema.size()) { + RelDataType rt = obChild.getRowType(); + Set collationInputRefs = new HashSet(RelCollationImpl.ordinals(obRel + .getCollation())); + ImmutableMap.Builder inputRefToCallMapBldr = ImmutableMap.builder(); + for (int i = resultSchema.size(); i < rt.getFieldCount(); i++) { + if (collationInputRefs.contains(i)) { + inputRefToCallMapBldr.put(i, obChild.getChildExps().get(i)); + } + } + + ImmutableMap inputRefToCallMap = inputRefToCallMapBldr.build(); + if ((obChild.getRowType().getFieldCount() - inputRefToCallMap.size()) == resultSchema + .size()) { + HiveProjectRel replacementProjectRel = HiveProjectRel.create(obChild.getChild(), obChild + .getChildExps().subList(0, resultSchema.size()), obChild.getRowType().getFieldNames() + .subList(0, resultSchema.size())); + obRel.replaceInput(0, replacementProjectRel); + obRel.setInputRefToCallMap(inputRefToCallMap); + } else { + throw new OptiqSemanticException( + "Result Schema didn't match Optiq Optimized Op Tree Schema"); + } } - parentOforiginalProjRel = tmpRel; - tmpRel = tmpRel.getInput(0); } + } + + private static RelNode renameTopLevelSelectInResultSchema(final RelNode rootRel, + Pair topSelparentPair, List resultSchema) + throws OptiqSemanticException { + RelNode parentOforiginalProjRel = topSelparentPair.getKey(); + HiveProjectRel originalProjRel = (HiveProjectRel) topSelparentPair.getValue(); // Assumption: top portion of tree could only be // (limit)?(OB)?(ProjectRelBase).... List rootChildExps = originalProjRel.getChildExps(); if (resultSchema.size() != rootChildExps.size()) { // this is a bug in Hive where for queries like select key,value,value - // convertRowSchemaToResultSetSchema() only returns schema containing key,value - // Underlying issue is much deeper because it seems like RowResolver itself doesnt have + // convertRowSchemaToResultSetSchema() only returns schema containing + // key,value + // Underlying issue is much deeper because it seems like RowResolver + // itself doesnt have // those mappings. see limit_pushdown.q & limit_pushdown_negative.q // Till Hive issue is fixed, disable CBO for such queries. throw new OptiqSemanticException("Result Schema didn't match Optiq Optimized Op Tree Schema"); } List newSelAliases = new ArrayList(); + String colAlias; for (int i = 0; i < rootChildExps.size(); i++) { - newSelAliases.add(resultSchema.get(i).getName()); + colAlias = resultSchema.get(i).getName(); + if (colAlias.startsWith("_")) + colAlias = colAlias.substring(1); + newSelAliases.add(colAlias); } HiveProjectRel replacementProjectRel = HiveProjectRel.create(originalProjRel.getChild(), @@ -231,9 +272,9 @@ private static boolean validGBParent(RelNode gbNode, RelNode parent) { private static boolean validSortParent(RelNode sortNode, RelNode parent) { boolean validParent = true; - if (parent != null && !(parent instanceof ProjectRelBase)) { + if (parent != null && !(parent instanceof ProjectRelBase) + && !((parent instanceof SortRel) || HiveOptiqUtil.orderRelNode(parent))) validParent = false; - } return validParent; } @@ -242,7 +283,8 @@ private static boolean validSortChild(HiveSortRel sortNode) { boolean validChild = true; RelNode child = sortNode.getChild(); - if (!(child instanceof ProjectRelBase)) { + if (!(HiveOptiqUtil.limitRelNode(sortNode) && HiveOptiqUtil.orderRelNode(child)) + && !(child instanceof ProjectRelBase)) { validChild = false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9726a45..94eda49 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -37,6 +37,7 @@ import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -12190,12 +12191,12 @@ private boolean canHandleQuery(QB qbToChk, boolean topLevelQB) { } private class OptiqBasedPlanner implements Frameworks.PlannerAction { - RelOptCluster cluster; - RelOptSchema relOptSchema; - SemanticException semanticException; - Map partitionCache; - AtomicInteger noColsMissingStats = new AtomicInteger(0); - List topLevelFieldSchema; + private RelOptCluster cluster; + private RelOptSchema relOptSchema; + private SemanticException semanticException; + private Map partitionCache; + private AtomicInteger noColsMissingStats = new AtomicInteger(0); + List topLevelFieldSchema; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -12240,7 +12241,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.relOptSchema = relOptSchema; try { - optiqGenPlan = genLogicalPlan(qb); + optiqGenPlan = genLogicalPlan(qb, true); topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); } catch (SemanticException e) { @@ -12861,7 +12862,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, Phase1Ctx ctx_1 = initPhase1Ctx(); doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); aliasToRel.put(subQuery.getAlias(), subQueryRelNode); RowResolver sqRR = relToHiveRR.get(subQueryRelNode); @@ -12889,7 +12890,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, ctx_1 = initPhase1Ctx(); doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1); getMetaData(qbSQ_nic); - RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, // set explicitly to inner until we figure out SemiJoin use @@ -13297,8 +13298,22 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException return gbRel; } - private RelNode genOBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - RelNode relToRet = null; + /** + * Generate OB RelNode and input Select RelNode that should be used to + * introduce top constraining Project. If Input select RelNode is not + * present then don't introduce top constraining select. + * + * @param qb + * @param srcRel + * @param outermostOB + * @return Pair Key- OB RelNode, Value - Input Select for + * top constraining Select + * @throws SemanticException + */ + private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) + throws SemanticException { + RelNode sortRel = null; + RelNode originalOBChild = null; QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); @@ -13365,7 +13380,8 @@ private RelNode genOBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException fieldCollations.add(new RelFieldCollation(fieldIndex, order)); } - // 3. Add Child Project Rel if needed + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel RelNode obInputRel = srcRel; if (!newVCLst.isEmpty()) { List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), @@ -13375,53 +13391,46 @@ public RexNode apply(RelDataTypeField input) { return new RexInputRef(input.getIndex(), input.getType()); } }); + RowResolver obSyntheticProjectRR = new RowResolver(); + RowResolver.add(obSyntheticProjectRR, inputRR, 0); + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + getColumnInternalName(vcolPos), astTypePair.getValue(), null, false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); + + if (outermostOB) { + RowResolver.add(outputRR, inputRR, 0); - obInputRel = HiveProjectRel.create(srcRel, CompositeList.of(originalInputRefs, newVCLst), - null); + } else { + RowResolver.add(outputRR, obSyntheticProjectRR, 0); + originalOBChild = srcRel; + } + } else { + RowResolver.add(outputRR, inputRR, 0); } // 4. Construct SortRel RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION); RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - // TODO: Is it better to introduce a - // project on top to restrict VC from showing up in sortRel type - RelNode sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation, - null, null); - - // 5. Construct OB Parent Rel If needed - // Construct a parent Project if OB has virtual columns(vc) otherwise - // vc would show up in the result - // TODO: If OB is part of sub query & Parent Query select is not of the - // type "select */.*..." then parent project is not needed - relToRet = sortRel; - if (!newVCLst.isEmpty()) { - List obParentRelProjs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); + sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation, null, null); - relToRet = HiveProjectRel.create(sortRel, obParentRelProjs, null); - } - - // 6. Construct output RR - RowResolver.add(outputRR, inputRR, 0); - - // 7. Update the maps + // 5. Update the maps // NOTE: Output RR for SortRel is considered same as its input; we may // end up not using VC that is present in sort rel. Also note that // rowtype of sortrel is the type of it child; if child happens to be // synthetic project that we introduced then that projectrel would // contain the vc. ImmutableMap hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR, - relToRet); - relToHiveRR.put(relToRet, outputRR); - relToHiveColNameOptiqPosMap.put(relToRet, hiveColNameOptiqPosMap); + sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap); } - return relToRet; + return (new Pair(sortRel, originalOBChild)); } private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { @@ -13648,6 +13657,53 @@ private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticExce private RelNode genSelectRelNode(List optiqColLst, RowResolver out_rwsch, RelNode srcRel) throws OptiqSemanticException { // 1. Build Column Names + Set colNamesSet = new HashSet(); + List cInfoLst = out_rwsch.getRowSchema().getSignature(); + ArrayList columnNames = new ArrayList(); + String[] qualifiedColNames; + String tmpColAlias; + for (int i = 0; i < optiqColLst.size(); i++) { + ColumnInfo cInfo = cInfoLst.get(i); + qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); + /* + if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty()) + tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; + else + */ + tmpColAlias = qualifiedColNames[1]; + + // Prepend column names with '_o_' if it starts with '_c' + /* + * Hive treats names that start with '_c' as internalNames; so change + * the names so we don't run into this issue when converting back to + * Hive AST. + */ + if (tmpColAlias.startsWith("_c")) + tmpColAlias = "_o_" + tmpColAlias; + int suffix = 1; + while (colNamesSet.contains(tmpColAlias)) { + tmpColAlias = qualifiedColNames[1] + suffix; + suffix++; + } + + colNamesSet.add(tmpColAlias); + columnNames.add(tmpColAlias); + } + + // 3 Build Optiq Rel Node for project using converted projections & col + // names + HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, columnNames); + + // 4. Keep track of colname-to-posmap && RR for new select + this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel)); + this.relToHiveRR.put(selRel, out_rwsch); + + return selRel; + } + + private RelNode genSelectRelNode(List optiqColLst, RowResolver out_rwsch, + RelNode srcRel, boolean removethismethod) throws OptiqSemanticException { + // 1. Build Column Names // TODO: Should this be external names ArrayList columnNames = new ArrayList(); for (int i = 0; i < optiqColLst.size(); i++) { @@ -13857,7 +13913,7 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcep private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - return genLogicalPlan(qbexpr.getQB()); + return genLogicalPlan(qbexpr.getQB(), false); } if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); @@ -13869,7 +13925,7 @@ private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { return null; } - private RelNode genLogicalPlan(QB qb) throws SemanticException { + private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { RelNode srcRel = null; RelNode filterRel = null; RelNode gbRel = null; @@ -13906,15 +13962,16 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { } if (aliasToRel.isEmpty()) { - //// This may happen for queries like select 1; (no source table) + // // This may happen for queries like select 1; (no source table) // We can do following which is same, as what Hive does. // With this, we will be able to generate Optiq plan. - // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); - // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); - // qb.addAlias(DUMMY_TABLE); - // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); - // aliasToRel.put(DUMMY_TABLE, op); - // However, Hive trips later while trying to get Metadata for this dummy table + // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); + // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); + // qb.addAlias(DUMMY_TABLE); + // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); + // aliasToRel.put(DUMMY_TABLE, op); + // However, Hive trips later while trying to get Metadata for this dummy + // table // So, for now lets just disable this. Anyway there is nothing much to // optimize in such cases. throw new OptiqSemanticException("Unsupported"); @@ -13945,14 +14002,44 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { srcRel = (selectRel == null) ? srcRel : selectRel; // 6. Build Rel for OB Clause - obRel = genOBLogicalPlan(qb, srcRel); + Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); + obRel = obTopProjPair.getKey(); + RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); srcRel = (obRel == null) ? srcRel : obRel; // 7. Build Rel for Limit Clause limitRel = genLimitLogicalPlan(qb, srcRel); srcRel = (limitRel == null) ? srcRel : limitRel; - // 8. Incase this QB corresponds to subquery then modify its RR to point + // 8. Introduce top constraining select if needed. + // NOTES: + // 1. Optiq can not take an expr in OB; hence it needs to be added as VC + // in the input select; In such cases we need to introduce a select on top + // to ensure VC is not visible beyond Limit, OB. + // 2. Hive can not preserve order across select. In subqueries OB is used + // to get a deterministic set of tuples from following limit. Hence we + // introduce the constraining select above Limit (if present) instead of + // OB. + // 3. The top level OB will not introduce constraining select due to Hive + // limitation(#2) stated above. The RR for OB will not include VC. Thus + // Result Schema will not include exprs used by top OB. During AST Conv, + // in the PlanModifierForASTConv we would modify the top level OB to + // migrate exprs from input sel to SortRel (Note that Optiq doesn't + // support this; but since we are done with Optiq at this point its OK). + if (topConstrainingProjArgsRel != null) { + List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() + .getFieldList(), new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver topConstrainingProjRR = new RowResolver(); + RowResolver.add(topConstrainingProjRR, this.relToHiveRR.get(topConstrainingProjArgsRel), 0); + srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); + } + + // 9. Incase this QB corresponds to subquery then modify its RR to point // to subquery alias // TODO: cleanup this if (qb.getParseInfo().getAlias() != null) {