diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java index 6bde1d1..e8069ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/TraitsUtil.java @@ -47,4 +47,8 @@ public static RelTraitSet getTableScanTraitSet(RelOptCluster cluster, RelTraitSe public static RelTraitSet getJoinTraitSet(RelOptCluster cluster, RelTraitSet traitSet) { return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); } + + public static RelTraitSet getUnionTraitSet(RelOptCluster cluster, RelTraitSet traitSet) { + return cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java index 03f58d0..8cbf2f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveProjectRel.java @@ -1,5 +1,6 @@ package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; +import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -18,8 +19,12 @@ import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelTraitSet; import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexBuilder; import org.eigenbase.rex.RexNode; import org.eigenbase.rex.RexUtil; +import org.eigenbase.util.mapping.Mapping; +import org.eigenbase.util.mapping.MappingType; public class HiveProjectRel extends ProjectRelBase implements HiveRel { @@ -72,6 +77,57 @@ public static HiveProjectRel create(RelOptCluster cluster, RelNode child, List + * A partial mapping is weaker than a permutation: every target has one + * source, but a source may have 0, 1 or more than one targets. Usually the + * result will have fewer fields than the source, unless some source fields + * are projected multiple times. + * + *

+ * This method could optimize the result as {@link #permute} does, but does + * not at present. + * + * @param rel + * Relational expression + * @param mapping + * Mapping from source fields to target fields. The mapping type must + * obey the constraints {@link MappingType#isMandatorySource()} and + * {@link MappingType#isSingleSource()}, as does + * {@link MappingType#INVERSE_FUNCTION}. + * @param fieldNames + * Field names; if null, or if a particular entry is null, the name + * of the permuted field is used + * @return relational expression which projects a subset of the input fields + */ + public static RelNode projectMapping(RelNode rel, Mapping mapping, List fieldNames) { + assert mapping.getMappingType().isSingleSource(); + assert mapping.getMappingType().isMandatorySource(); + + if (mapping.isIdentity()) { + return rel; + } + + final List outputNameList = new ArrayList(); + final List outputProjList = new ArrayList(); + final List fields = rel.getRowType().getFieldList(); + final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + + for (int i = 0; i < mapping.getTargetCount(); i++) { + int source = mapping.getSource(i); + final RelDataTypeField sourceField = fields.get(source); + outputNameList + .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField + .getName() : fieldNames.get(i)); + outputProjList.add(rexBuilder.makeInputRef(rel, source)); + } + + return create(rel, (List) outputProjList, outputNameList); + } + public ProjectRelBase copy(RelTraitSet traitSet, RelNode input, List exps, RelDataType rowType) { assert traitSet.containsIfApplicable(HiveRel.CONVENTION); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java new file mode 100644 index 0000000..b81f3c8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveUnionRel.java @@ -0,0 +1,25 @@ +package org.apache.hadoop.hive.ql.optimizer.optiq.reloperators; + +import java.util.List; + +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel.Implementor; +import org.eigenbase.rel.RelNode; +import org.eigenbase.rel.SetOpRel; +import org.eigenbase.rel.UnionRelBase; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.relopt.RelTraitSet; + +public class HiveUnionRel extends UnionRelBase { + + public HiveUnionRel(RelOptCluster cluster, RelTraitSet traits, List inputs) { + super(cluster, traits, inputs, true); + } + + @Override + public SetOpRel copy(RelTraitSet traitSet, List inputs, boolean all) { + return new HiveUnionRel(this.getCluster(), traitSet, inputs); + } + + public void implement(Implementor implementor) { + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java index 6cdae7f..3cf50ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java @@ -114,7 +114,7 @@ protected void perform(RelOptRuleCall call, FilterRelBase filter, boolean filterPushed = false; final Holder joinTypeHolder = Holder.of(join.getJoinType()); if (RelOptUtil.classifyFilters(join, aboveFilters, - join.getJoinType(), !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType() + join.getJoinType(), true, !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType() .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) { filterPushed = true; } @@ -159,7 +159,7 @@ protected void perform(RelOptRuleCall call, FilterRelBase filter, // Try to push down filters in ON clause. A ON clause filter can only be // pushed down if it does not affect the non-matching set, i.e. it is // not on the side which is preserved. - if (RelOptUtil.classifyFilters(join, joinFilters, null, !join + if (RelOptUtil.classifyFilters(join, joinFilters, null, false, !join .getJoinType().generatesNullsOnRight(), !join.getJoinType() .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) { filterPushed = true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java index d136814..349255b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java @@ -18,6 +18,7 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel; import org.eigenbase.rel.AggregateCall; import org.eigenbase.rel.AggregateRel; import org.eigenbase.rel.CalcRel; @@ -556,7 +557,7 @@ public TrimResult trimFields(HiveJoinRel join, BitSet fieldsUsed, * Variant of {@link #trimFields(RelNode, BitSet, Set)} for {@link SetOpRel} * (including UNION and UNION ALL). */ - public TrimResult trimFields(SetOpRel setOp, BitSet fieldsUsed, + public TrimResult trimFields(HiveUnionRel setOp, BitSet fieldsUsed, Set extraFields) { final RelDataType rowType = setOp.getRowType(); final int fieldCount = rowType.getFieldCount(); @@ -594,7 +595,7 @@ public TrimResult trimFields(SetOpRel setOp, BitSet fieldsUsed, Mapping remaining = Mappings.divide(mapping, inputMapping); // Create a projection; does nothing if remaining is identity. - newInput = CalcRel.projectMapping(newInput, remaining, null); + newInput = HiveProjectRel.projectMapping(newInput, remaining, null); if (input != newInput) { ++changeCount; @@ -608,7 +609,7 @@ public TrimResult trimFields(SetOpRel setOp, BitSet fieldsUsed, return new TrimResult(setOp, mapping); } - RelNode newSetOp = setOp.copy(setOp.getTraitSet(), newInputs); + RelNode newSetOp = setOp.copy(setOp.getTraitSet(), newInputs, true); return new TrimResult(newSetOp, mapping); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java index d9db8c6..821e403 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java @@ -24,6 +24,7 @@ import org.eigenbase.rel.RelVisitor; import org.eigenbase.rel.SortRel; import org.eigenbase.rel.TableAccessRelBase; +import org.eigenbase.rel.UnionRelBase; import org.eigenbase.reltype.RelDataTypeField; import org.eigenbase.rex.RexCall; import org.eigenbase.rex.RexFieldCollation; @@ -191,6 +192,20 @@ private QueryBlockInfo convertSource(RelNode r) { ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin); if (semiJoin) s = left.schema; + } else if (r instanceof UnionRelBase) { + RelNode leftInput = ((UnionRelBase) r).getInput(0); + RelNode rightInput = ((UnionRelBase) r).getInput(1); + + ASTConverter leftConv = new ASTConverter(leftInput); + ASTConverter rightConv = new ASTConverter(rightInput); + ASTNode leftAST = leftConv.convert((SortRel) null); + ASTNode rightAST = rightConv.convert((SortRel) null); + + ASTNode unionAST = getUnionAllAST(leftAST, rightAST); + + String sqAlias = ASTConverter.nextAlias(); + ast = ASTBuilder.subQuery(unionAST, sqAlias); + s = new Schema((UnionRelBase) r, sqAlias); } else { ASTConverter src = new ASTConverter(r); ASTNode srcAST = src.convert(order); @@ -231,6 +246,8 @@ public void visit(RelNode node, int ordinal, RelNode parent) { handle((ProjectRelBase) node); } else if (node instanceof JoinRelBase) { ASTConverter.this.from = node; + } else if (node instanceof UnionRelBase) { + ASTConverter.this.from = node; } else if (node instanceof AggregateRelBase) { ASTConverter.this.groupBy = (AggregateRelBase) node; } else if (node instanceof SortRel) { @@ -271,16 +288,16 @@ public ASTNode visitLiteral(RexLiteral literal) { private ASTNode getPSpecAST(RexWindow window) { ASTNode pSpecAst = null; - + ASTNode dByAst = null; if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { dByAst = ASTBuilder.createAST(HiveParser.TOK_DISTRIBUTEBY, "TOK_DISTRIBUTEBY"); for (RexNode pk : window.partitionKeys) { ASTNode astCol = pk.accept(this); dByAst.addChild(astCol); - } + } } - + ASTNode oByAst = null; if (window.orderKeys != null && !window.orderKeys.isEmpty()) { oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); @@ -288,7 +305,7 @@ private ASTNode getPSpecAST(RexWindow window) { ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - ASTNode astCol = ok.left.accept(this); + ASTNode astCol = ok.left.accept(this); astNode.addChild(astCol); oByAst.addChild(astNode); } @@ -304,15 +321,15 @@ private ASTNode getPSpecAST(RexWindow window) { return pSpecAst; } - + private ASTNode getWindowBound(RexWindowBound wb) { ASTNode wbAST = null; - + if (wb.isCurrentRow()) { - wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT"); - } else { + wbAST = ASTBuilder.createAST(HiveParser.KW_CURRENT, "CURRENT"); + } else { if (wb.isPreceding()) - wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING"); + wbAST = ASTBuilder.createAST(HiveParser.KW_PRECEDING, "PRECEDING"); else wbAST = ASTBuilder.createAST(HiveParser.KW_FOLLOWING, "FOLLOWING"); if (wb.isUnbounded()) { @@ -322,7 +339,7 @@ private ASTNode getWindowBound(RexWindowBound wb) { wbAST.addChild(offset); } } - + return wbAST; } @@ -330,29 +347,29 @@ private ASTNode getWindowRangeAST(RexWindow window) { ASTNode wRangeAst = null; ASTNode startAST = null; - RexWindowBound ub = (RexWindowBound)window.getUpperBound(); + RexWindowBound ub = (RexWindowBound) window.getUpperBound(); if (ub != null) { startAST = getWindowBound(ub); } - + ASTNode endAST = null; - RexWindowBound lb = (RexWindowBound)window.getLowerBound(); + RexWindowBound lb = (RexWindowBound) window.getLowerBound(); if (lb != null) { endAST = getWindowBound(lb); } if (startAST != null || endAST != null) { - //NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) + // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) if (window.isRows()) - wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE"); + wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWRANGE, "TOK_WINDOWRANGE"); else wRangeAst = ASTBuilder.createAST(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES"); if (startAST != null) - wRangeAst.addChild(startAST); + wRangeAst.addChild(startAST); if (endAST != null) wRangeAst.addChild(endAST); } - + return wRangeAst; } @@ -361,14 +378,14 @@ public ASTNode visitOver(RexOver over) { if (!deep) { return null; } - + // 1. Translate the UDAF final ASTNode wUDAFAst = visitCall(over); - + // 2. Add TOK_WINDOW as child of UDAF ASTNode wSpec = ASTBuilder.createAST(HiveParser.TOK_WINDOWSPEC, "TOK_WINDOWSPEC"); wUDAFAst.addChild(wSpec); - + // 3. Add Part Spec & Range Spec as child of TOK_WINDOW final RexWindow window = over.getWindow(); final ASTNode wPSpecAst = getPSpecAST(window); @@ -378,7 +395,6 @@ public ASTNode visitOver(RexOver over) { if (wRangeAst != null) wSpec.addChild(wRangeAst); - return wUDAFAst; } @@ -442,6 +458,12 @@ public QueryBlockInfo(Schema schema, ASTNode ast) { } } + Schema(UnionRelBase unionRel, String alias) { + for (RelDataTypeField field : unionRel.getRowType().getFieldList()) { + add(new ColumnInfo(alias, field.getName())); + } + } + @SuppressWarnings("unchecked") Schema(Schema left, Schema right) { for (ColumnInfo cI : Iterables.concat(left, right)) { @@ -541,6 +563,14 @@ public ASTNode getAST() { } } + public ASTNode getUnionAllAST(ASTNode leftAST, ASTNode rightAST) { + + ASTNode unionTokAST = ASTBuilder.construct(HiveParser.TOK_UNION, "TOK_UNION").add(leftAST) + .add(rightAST).node(); + + return unionTokAST; + } + public static boolean isFlat(RexCall call) { boolean flat = false; if (call.operands != null && call.operands.size() > 2) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java index 2af707c..98077fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/DerivedTableInjector.java @@ -18,6 +18,7 @@ import org.eigenbase.rel.SingleRel; import org.eigenbase.rel.TableAccessRelBase; import org.eigenbase.rel.TableFunctionRelBase; +import org.eigenbase.rel.UnionRelBase; import org.eigenbase.rel.ValuesRelBase; import org.eigenbase.rel.rules.MultiJoinRel; import org.eigenbase.relopt.hep.HepRelVertex; @@ -36,29 +37,46 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema) // OB, Limit in sub query. // RelNode newTopSelect = introduceTopLevelSelectInResultSchema(rel, // resultSchema); - RelNode newTopSelect = rel; - convertOpTree(newTopSelect, (RelNode) null); - return newTopSelect; + RelNode newTopNode = rel; + + // NOTE: Hive requires Union to buried in Project (TOK_QUERY, + // TOK_SUBQUERY, TOK_UNION) + if (newTopNode instanceof UnionRelBase) { + newTopNode = introduceDerivedTable(newTopNode); + } + + convertOpTree(newTopNode, (RelNode) null); + + return newTopNode; } private static void convertOpTree(RelNode rel, RelNode parent) { if (rel instanceof EmptyRel) { - // TODO: replace with null scan + throw new RuntimeException("Found Empty Rel"); } else if (rel instanceof HepRelVertex) { - // TODO: is this relevant? + throw new RuntimeException("Found HepRelVertex"); } else if (rel instanceof JoinRelBase) { if (!validJoinParent(rel, parent)) { introduceDerivedTable(rel, parent); } } else if (rel instanceof MultiJoinRel) { - + throw new RuntimeException("Found MultiJoinRel"); } else if (rel instanceof OneRowRelBase) { - + throw new RuntimeException("Found OneRowRelBase"); } else if (rel instanceof RelSubset) { - + throw new RuntimeException("Found RelSubset"); } else if (rel instanceof SetOpRel) { + // TODO: Handle more than 2 inputs for setop + if (!validSetopParent(rel, parent)) + introduceDerivedTable(rel, parent); + SetOpRel setopRel = (SetOpRel) rel; + for (RelNode inputRel : setopRel.getInputs()) { + if (!validSetopChild(inputRel)) { + introduceDerivedTable(inputRel, setopRel); + } + } } else if (rel instanceof SingleRel) { if (rel instanceof FilterRelBase) { if (!validFilterParent(rel, parent)) { @@ -104,7 +122,7 @@ private static HiveProjectRel introduceTopLevelSelectInResultSchema(final RelNod curNode = curNode.getInput(0); } - //Assumption: tree could only be (limit)?(OB)?(ProjectRelBase).... + // Assumption: tree could only be (limit)?(OB)?(ProjectRelBase).... List rootChildExps = rootProjRel.getChildExps(); if (resultSchema.size() != rootChildExps.size()) { throw new RuntimeException("Result Schema didn't match Optiq Optimized Op Tree Schema"); @@ -120,6 +138,20 @@ private static HiveProjectRel introduceTopLevelSelectInResultSchema(final RelNod return HiveProjectRel.create(rootRel, newSelExps, newSelAliases); } + private static RelNode introduceDerivedTable(final RelNode rel) { + List projectList = Lists.transform(rel.getRowType().getFieldList(), + new Function() { + public RexNode apply(RelDataTypeField field) { + return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex()); + } + }); + + HiveProjectRel select = HiveProjectRel.create(rel.getCluster(), rel, projectList, + rel.getRowType(), rel.getCollationList()); + + return select; + } + private static void introduceDerivedTable(final RelNode rel, RelNode parent) { int i = 0; int pos = -1; @@ -137,17 +169,9 @@ private static void introduceDerivedTable(final RelNode rel, RelNode parent) { throw new RuntimeException("Couldn't find child node in parent's inputs"); } - List projectList = Lists.transform(rel.getRowType().getFieldList(), - new Function() { - public RexNode apply(RelDataTypeField field) { - return rel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex()); - } - }); + RelNode select = introduceDerivedTable(rel); - HiveProjectRel select = HiveProjectRel.create(rel.getCluster(), rel, projectList, - rel.getRowType(), rel.getCollationList()); parent.replaceInput(pos, select); - } private static boolean validJoinParent(RelNode joinNode, RelNode parent) { @@ -210,4 +234,24 @@ private static boolean validSortChild(HiveSortRel sortNode) { return validChild; } + + private static boolean validSetopParent(RelNode setop, RelNode parent) { + boolean validChild = true; + + if (parent != null && !(parent instanceof ProjectRelBase)) { + validChild = false; + } + + return validChild; + } + + private static boolean validSetopChild(RelNode setopChild) { + boolean validChild = true; + + if (!(setopChild instanceof ProjectRelBase)) { + validChild = false; + } + + return validChild; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 739e1b7..75e3696 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -112,6 +112,7 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.optiq.Pair; import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; @@ -120,13 +121,10 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveMergeProjectRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePullUpProjectsAboveJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushJoinThroughJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveRelFieldTrimmer; -import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveSwapJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; @@ -228,7 +226,6 @@ import org.eigenbase.rel.JoinRelType; import org.eigenbase.rel.RelCollation; import org.eigenbase.rel.RelCollationImpl; -import org.eigenbase.rel.RelFactories; import org.eigenbase.rel.RelFieldCollation; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.metadata.CachingRelMetadataProvider; @@ -236,8 +233,6 @@ import org.eigenbase.rel.metadata.RelMetadataProvider; import org.eigenbase.rel.rules.ConvertMultiJoinRule; import org.eigenbase.rel.rules.LoptOptimizeJoinRule; -import org.eigenbase.rel.rules.OptimizeBushyJoinRule; -import org.eigenbase.rel.rules.PushFilterPastJoinRule; import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.relopt.RelOptPlanner; import org.eigenbase.relopt.RelOptQuery; @@ -272,7 +267,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; /** @@ -11927,9 +11921,137 @@ private RelNode hepPlan(RelNode basePlan, return planner.findBestExp(); } - private RelNode genUnionLogicalPlan(String unionalias, String leftalias, - RelNode leftOp, String rightalias, RelNode rightOp) { - return null; + @SuppressWarnings("nls") + private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + String rightalias, RelNode rightRel) throws SemanticException { + HiveUnionRel unionRel = null; + + // 1. Get Row Resolvers, Column map for original left and right input of + // Union Rel + RowResolver leftRR = this.m_relToHiveRR.get(leftRel); + RowResolver rightRR = this.m_relToHiveRR.get(rightRel); + HashMap leftmap = leftRR.getFieldMap(leftalias); + HashMap rightmap = rightRR.getFieldMap(rightalias); + + // 2. Validate that Union is feasible according to Hive (by using type + // info from RR) + if (leftmap.size() != rightmap.size()) { + throw new SemanticException("Schema of both sides of union should match."); + } + + ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias( + qb.getAliases().get(0)); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field)); + } + if (lInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field)); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: field " + field + ":" + + " appears on the left side of the UNION at column position: " + + getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); + } + // try widening coversion, otherwise fail union + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType()); + if (commonTypeInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: Column " + field + " is of type " + + lInfo.getType().getTypeName() + " on first table and type " + + rInfo.getType().getTypeName() + " on second table")); + } + } + + // 3. construct Union Output RR using original left & right Input + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setTabAlias(unionalias); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType())); + unionoutRR.put(unionalias, field, unionColInfo); + } + + // 4. Determine which columns requires cast on left/right input (Optiq + // requires exact types on both sides of union) + boolean leftNeedsTypeCast = false; + boolean rightNeedsTypeCast = false; + List leftProjs = new ArrayList(); + List rightProjs = new ArrayList(); + List leftRowDT = leftRel.getRowType().getFieldList(); + List rightRowDT = rightRel.getRowType().getFieldList(); + + RelDataType leftFieldDT; + RelDataType rightFieldDT; + RelDataType unionFieldDT; + List tmpDTLst = new ArrayList(); + for (int i = 0; i < leftRowDT.size(); i++) { + leftFieldDT = leftRowDT.get(i).getType(); + rightFieldDT = rightRowDT.get(i).getType(); + if (!leftFieldDT.equals(rightFieldDT)) { + tmpDTLst.clear(); + tmpDTLst.add(leftFieldDT); + tmpDTLst.add(rightFieldDT); + unionFieldDT = m_cluster.getTypeFactory().leastRestrictive(tmpDTLst); + + if (!unionFieldDT.equals(leftFieldDT)) + leftNeedsTypeCast = true; + leftProjs.add(m_cluster.getRexBuilder().ensureType(unionFieldDT, + m_cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + + if (!unionFieldDT.equals(rightFieldDT)) + rightNeedsTypeCast = true; + rightProjs.add(m_cluster.getRexBuilder().ensureType(unionFieldDT, + m_cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } else { + leftProjs.add(m_cluster.getRexBuilder().ensureType(leftFieldDT, + m_cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + rightProjs.add(m_cluster.getRexBuilder().ensureType(rightFieldDT, + m_cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } + } + + // 5. Introduce Project Rel above original left/right inputs if cast is + // needed for type parity + RelNode unionLeftInput = leftRel; + RelNode unionRightInput = rightRel; + if (leftNeedsTypeCast) { + unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType() + .getFieldNames()); + } + if (rightNeedsTypeCast) { + unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType() + .getFieldNames()); + } + + // 6. Construct Union Rel + ImmutableList.Builder bldr = new ImmutableList.Builder(); + bldr.add(unionLeftInput); + bldr.add(unionRightInput); + unionRel = new HiveUnionRel(m_cluster, TraitsUtil.getUnionTraitSet(m_cluster, null), + bldr.build()); + + m_relToHiveRR.put(unionRel, unionoutRR); + m_relToHiveColNameOptiqPosMap.put(unionRel, + this.buildHiveToOptiqColumnMap(unionoutRR, unionRel)); + + return unionRel; } private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, diff --git a/ql/src/test/queries/clientpositive/cbo_correctness.q b/ql/src/test/queries/clientpositive/cbo_correctness.q index 8ab893b..447620a 100644 --- a/ql/src/test/queries/clientpositive/cbo_correctness.q +++ b/ql/src/test/queries/clientpositive/cbo_correctness.q @@ -188,7 +188,7 @@ select count(*) from v1 a join v1 b on a.value = b.value; create view v3 as select v1.value val from v1 join t1 on v1.c_boolean = t1.c_boolean; --- view chaining +-- 10. view chaining select count(val) from v3 where val != '1'; with q1 as ( select key from t1 where key = '1') select count(*) from q1; @@ -214,3 +214,9 @@ drop view v1; drop view v2; drop view v3; drop view v4; + +-- 11. Union All +select * from t1 union all select * from t2 order by key; +select key from (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r2 where key >=0 order by key; +select r2.key from (select key, c_int from (select key, c_int from t1 union all select key, c_int from t3 )r1 union all select key, c_int from t3)r2 join (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r3 on r2.key=r3.key where r3.key >=0 order by r2.key; + diff --git a/ql/src/test/results/clientpositive/cbo_correctness.q.out b/ql/src/test/results/clientpositive/cbo_correctness.q.out index 3e18388..b40f74c 100644 --- a/ql/src/test/results/clientpositive/cbo_correctness.q.out +++ b/ql/src/test/results/clientpositive/cbo_correctness.q.out @@ -15890,7 +15890,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@t1 POSTHOOK: Input: default@v1 POSTHOOK: Output: default@v3 -PREHOOK: query: -- view chaining +PREHOOK: query: -- 10. view chaining select count(val) from v3 where val != '1' PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -15898,7 +15898,7 @@ PREHOOK: Input: default@t1@dt=2014 PREHOOK: Input: default@v1 PREHOOK: Input: default@v3 #### A masked pattern was here #### -POSTHOOK: query: -- view chaining +POSTHOOK: query: -- 10. view chaining select count(val) from v3 where val != '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 @@ -16014,3 +16014,919 @@ POSTHOOK: query: drop view v4 POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@v4 POSTHOOK: Output: default@v4 +PREHOOK: query: -- 11. Union All +select * from t1 union all select * from t2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@dt=2014 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- 11. Union All +select * from t1 union all select * from t2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@dt=2014 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@dt=2014 +#### A masked pattern was here #### + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 false 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 false 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +null null NULL NULL NULL 2014 +null null NULL NULL NULL 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 + 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 false 2014 +1 1 1 1.0 false 2014 +null null NULL NULL NULL 2014 +null null NULL NULL NULL 2014 +PREHOOK: query: select key from (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r2 where key >=0 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@dt=2014 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@dt=2014 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select key from (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r2 where key >=0 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@dt=2014 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@dt=2014 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +PREHOOK: query: select r2.key from (select key, c_int from (select key, c_int from t1 union all select key, c_int from t3 )r1 union all select key, c_int from t3)r2 join (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r3 on r2.key=r3.key where r3.key >=0 order by r2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@dt=2014 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@dt=2014 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select r2.key from (select key, c_int from (select key, c_int from t1 union all select key, c_int from t3 )r1 union all select key, c_int from t3)r2 join (select key, c_int from (select * from t1 union all select * from t2 where t2.key >=0)r1 union all select key, c_int from t3)r3 on r2.key=r3.key where r3.key >=0 order by r2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@dt=2014 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@dt=2014 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3