diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java new file mode 100644 index 0000000000..2f43aa43f9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -0,0 +1,427 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Aggregate.Group; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexTableInputRef; +import org.apache.calcite.rex.RexTableInputRef.RelTableRef; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The rule can perform two different optimizations. + * 1) Removes a join if is does not alter the cardinality of the one of its inputs. + * In particular, this rule is triggered if: + * - it is a join on PK-FK/UK-FK, + * - project on top only references columns from the FK side, and + * - PK/UK side is not filtered + * It optionally adds an IS NOT NULL filter if any FK column can be nullable + * 2) Transforms a left/right outer join into an inner join if: + * - it is a join on PK-FK/UK-FK, + * - FK is not nullable + * - PK/UK side is not filtered + */ +public class HiveJoinConstraintsRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger(HiveJoinConstraintsRule.class); + + public static final HiveJoinConstraintsRule INSTANCE = + new HiveJoinConstraintsRule(HiveRelFactories.HIVE_BUILDER); + + + protected HiveJoinConstraintsRule(RelBuilderFactory relBuilder) { + super( + operand(Project.class, + some(operand(Join.class, any()))), + relBuilder, "HiveJoinConstraintsRule"); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Project project = call.rel(0); + Join join = call.rel(1); + final JoinRelType joinType = join.getJoinType(); + final RelNode leftInput = join.getLeft(); + final RelNode rightInput = join.getRight(); + + // 1) If it is an inner, check whether project only uses columns from one side. + // That side will need to be the FK side. + // If it is a left outer, left will be the FK side. + // If it is a right outer, right will be the FK side. + final RelNode fkInput; + final RelNode nonFkInput; + final ImmutableBitSet topRefs = + RelOptUtil.InputFinder.bits(project.getChildExps(), null); + final ImmutableBitSet leftBits = + ImmutableBitSet.range(leftInput.getRowType().getFieldCount()); + final ImmutableBitSet rightBits = + ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), + join.getRowType().getFieldCount()); + final boolean leftFK = topRefs.intersects(leftBits); + final boolean rightFK = topRefs.intersects(rightBits); + final Mode mode = (leftFK && rightFK) || (!leftFK && !rightFK) ? Mode.TRANSFORM : Mode.REMOVE; + switch (joinType) { + case INNER: + if (mode == Mode.TRANSFORM) { + // Bails out as it references columns from both sides (or no columns) + // and there is nothing to transform + return; + } + fkInput = leftFK ? leftInput : rightInput; + nonFkInput = leftFK ? rightInput : leftInput; + break; + case LEFT: + fkInput = leftInput; + nonFkInput = rightInput; + break; + case RIGHT: + fkInput = rightInput; + nonFkInput = leftInput; + break; + default: + // Other type, bail out + return; + } + + // 2) Check whether there is any filtering condition on the + // non-FK side. Basically we check whether the operators + // below altered the PK cardinality in any way. Currently + // we support Project (without windowing functions) and GBy + // where PK is one of the grouping columns + final RelMetadataQuery mq = call.getMetadataQuery(); + final Multimap, RelNode> nodesBelowNonFkInput = + mq.getNodeTypes(nonFkInput); + for (Entry, Collection> e : + nodesBelowNonFkInput.asMap().entrySet()) { + if (e.getKey() != TableScan.class && + e.getKey() != Project.class && + e.getKey() != Aggregate.class) { + // Bail out, we cannot rewrite the expression if non-fk side cardinality + // is being altered + return; + } + if (e.getKey() == TableScan.class) { + if (e.getValue().size() > 1) { + // Bail out as we may not have more than one TS on non-FK side + return; + } + } else if (e.getKey() == Project.class) { + // We check there is no windowing expression + for (RelNode node : e.getValue()) { + Project p = (Project) node; + for (RexNode expr : p.getChildExps()) { + if (expr instanceof RexOver) { + // Bail out as it may change cardinality + return; + } + } + } + } else if (e.getKey() == Aggregate.class) { + // We check there is are not grouping sets + for (RelNode node : e.getValue()) { + Aggregate a = (Aggregate) node; + if (a.getGroupType() != Group.SIMPLE) { + // Bail out as it may change cardinality + return; + } + } + } + } + + // 3) Check whether there is an FK relationship + if (join.getJoinType() != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = (Join) call.builder() + .push(leftInput).push(rightInput) + .join(JoinRelType.INNER, join.getCondition()) + .build(); + } + final RexNode cond = join.getCondition(); + final Map refToRex = new HashMap<>(); + final EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : RelOptUtil.conjunctions(cond)) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, we bail out + return; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, bail out + return; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, bail out + return; + } + List eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // Bail out as this condition could not be map into an input reference + return; + } + refToRex.put(inputRef1, eqOp1); + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + return; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalenceClass(inputRef1, inputRef2); + refToRex.put(inputRef2, eqOp2); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalenceClass(inputRef1, inputRef2); + } + } + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return; + } + + // 4) Gather all tables from the FK side and the table from the + // non-FK side + final Set leftTables = mq.getTableReferences(leftInput); + final Set rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(leftInput)); + final Set fkTables = leftFK ? leftTables : rightTables; + final Set nonFkTables = leftFK ? rightTables : leftTables; + assert nonFkTables.size() == 1; + final RelTableRef nonFkTable = nonFkTables.iterator().next(); + final List nonFkTableQName = nonFkTable.getQualifiedName(); + + // 5) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + boolean canBeRewritten = false; + List nullableNodes = new ArrayList<>(); + for (RelTableRef tRef : fkTables) { + List constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + if (foreignKeyColumnType.isNullable()) { + if (joinType == JoinRelType.INNER) { + // If it is nullable and it is an INNER, we just need a IS NOT NULL filter + RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); + assert originalCondOp != null; + nullableNodes.add(originalCondOp); + } else { + // If it is nullable and this is not an INNER, we cannot execute any transformation + allContained = false; + break; + } + } + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + // Remove this condition from eq classes as we have checked that it is present + // in the join condition + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); + } + ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { + // We made it + canBeRewritten = true; + break; + } + } + } + } + + // 6) If it is the only condition, we can trigger the rewriting + if (canBeRewritten) { + // If we reach here, we trigger the transform + if (mode == Mode.REMOVE) { + if (nullableNodes.isEmpty()) { + call.transformTo(call.builder() + .push(fkInput) + .project(project.getChildExps()) + .build()); + } else { + RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + RexNode newFilterCond; + if (nullableNodes.size() == 1) { + newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNodes.get(0)); + } else { + List isNotNullConds = new ArrayList<>(); + for (RexNode nullableNode : nullableNodes) { + isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNode)); + } + newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, isNotNullConds); + } + call.transformTo(call.builder() + .push(fkInput) + .filter(newFilterCond) + .project(project.getChildExps()) + .build()); + } + } else { // Mode.TRANSFORM + call.transformTo(call.builder() + .push(leftInput).push(rightInput) + .join(JoinRelType.INNER, join.getCondition()) + .project(project.getChildExps()) + .build()); + } + } + } + + private static RexTableInputRef extractTableInputRef(RexNode node) { + RexTableInputRef ref = null; + if (node instanceof RexTableInputRef) { + ref = (RexTableInputRef) node; + } else if (RexUtil.isLosslessCast(node) && + ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { + ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); + } + return ref; + } + + /** + * Class representing an equivalence class, i.e., a set of equivalent columns + * + * TODO: This is a subset of a private class in materialized view rewriting + * in Calcite. It should be moved to its own class in Calcite so it can be + * accessible here. + */ + private static class EquivalenceClasses { + + private final Map> nodeToEquivalenceClass; + + protected EquivalenceClasses() { + nodeToEquivalenceClass = new HashMap<>(); + } + + protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + Set c1 = nodeToEquivalenceClass.get(p1); + Set c2 = nodeToEquivalenceClass.get(p2); + if (c1 != null && c2 != null) { + // Both present, we need to merge + if (c1.size() < c2.size()) { + // We swap them to merge + c1 = c2; + p1 = p2; + } + for (RexTableInputRef newRef : c2) { + c1.add(newRef); + nodeToEquivalenceClass.put(newRef, c1); + } + } else if (c1 != null) { + // p1 present, we need to merge into it + c1.add(p2); + nodeToEquivalenceClass.put(p2, c1); + } else if (c2 != null) { + // p2 present, we need to merge into it + c2.add(p1); + nodeToEquivalenceClass.put(p1, c2); + } else { + // None are present, add to same equivalence class + Set equivalenceClass = new LinkedHashSet<>(); + equivalenceClass.add(p1); + equivalenceClass.add(p2); + nodeToEquivalenceClass.put(p1, equivalenceClass); + nodeToEquivalenceClass.put(p2, equivalenceClass); + } + } + + protected Map> getEquivalenceClassesMap() { + return nodeToEquivalenceClass; + } + + protected static EquivalenceClasses copy(EquivalenceClasses ec) { + final EquivalenceClasses newEc = new EquivalenceClasses(); + for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { + newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); + } + return newEc; + } + } + + private enum Mode { + // Removes join operator from the plan + REMOVE, + // Transforms LEFT/RIGHT outer join into INNER join + TRANSFORM + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java new file mode 100644 index 0000000000..43c78968d0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.rel.rules.ProjectJoinTransposeRule; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; + +public class HiveProjectJoinTransposeRule extends ProjectJoinTransposeRule { + + public static final HiveProjectJoinTransposeRule INSTANCE = + new HiveProjectJoinTransposeRule(HiveRelFactories.HIVE_BUILDER); + + private HiveProjectJoinTransposeRule(RelBuilderFactory relBuilderFactory) { + super(expr -> true, relBuilderFactory); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 361f150193..e68c2c397b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -100,7 +100,6 @@ import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.SqlSampleSpec; import org.apache.calcite.sql.SqlWindow; import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.parser.SqlParserPos; @@ -114,8 +113,6 @@ import org.apache.calcite.util.Pair; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ObjectPair; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -191,6 +188,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinConstraintsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; @@ -198,6 +196,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersectRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; @@ -1677,7 +1676,8 @@ private RowResolver genRowResolver(Operator op, QB qb) { private enum ExtendedCBOProfile { JOIN_REORDERING, - WINDOWING_POSTPROCESSING; + WINDOWING_POSTPROCESSING, + REFERENTIAL_CONSTRAINTS; } /** @@ -1806,9 +1806,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } // 4.1 Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin. // Don't run this rule if hive is to remove sq_count_check since that rule expects to have project b/w join. - calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, - HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); + calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); // 4.2 Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with @@ -2073,6 +2073,10 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses)); } + if (profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) { + rules.add(HiveProjectJoinTransposeRule.INSTANCE); + rules.add(HiveJoinConstraintsRule.INSTANCE); + } rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN); rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); @@ -2135,7 +2139,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 8. Merge, remove and reduce Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, - HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); + HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project, Merge Sort-Sort"); @@ -2894,6 +2898,9 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); + if (!optTable.getReferentialConstraints().isEmpty()) { + profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS); + } final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, @@ -2905,7 +2912,9 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); - + if (!optTable.getReferentialConstraints().isEmpty()) { + profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS); + } final HiveTableScan hts = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, @@ -2946,6 +2955,9 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); + if (!optTable.getReferentialConstraints().isEmpty()) { + profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS); + } // Build Hive Table Scan Rel tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, diff --git a/ql/src/test/queries/clientpositive/join_constraints_optimization.q b/ql/src/test/queries/clientpositive/join_constraints_optimization.q new file mode 100644 index 0000000000..f3e1fbe1ab --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_constraints_optimization.q @@ -0,0 +1,130 @@ +set hive.strict.checks.cartesian.product=false; + +CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +-- CAN BE REMOVED AND DOES NOT NEED FILTER ON JOIN COLUMN +-- AS COLUMN IS ALREADY NOT NULLABLE +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- CAN BE REMOVED AND INTRODUCES A FILTER ON JOIN COLUMN +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES THE JOIN +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- TRANSFORMS THE JOIN +EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- NOT TRANSFORMED INTO INNER JOIN SINCE JOIN COLUMN IS NULLABLE +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES BOTH JOINS +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES BOTH JOINS +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVE INNER AND NOT TRANFORM OUTER +EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVE FIRST OUTER AND NOT TRANFORM SECOND OUTER +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- NOT TRANFORM EITHER +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; diff --git a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out new file mode 100644 index 0000000000..7df8dbb3cd --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out @@ -0,0 +1,721 @@ +PREHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineorder_removal_n0 +POSTHOOK: query: CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineorder_removal_n0 +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_custkey (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_custkey (type: bigint), c_region (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_custkey (type: bigint), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +