diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0bfee2e43e..eaa10fe203 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2218,6 +2218,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction in the\n" + "number of tuples of the outer input of the join or the input of the union that you should get in order to apply the rule."), + HIVE_OPTIMIZE_CONSTRAINTS_JOIN("hive.optimize.constraints.join", true, "Whether to use referential constraints\n" + + "to optimize (remove or transform) join operators"), + HIVE_OPTIMIZE_REDUCE_WITH_STATS("hive.optimize.filter.stats.reduction", false, "Whether to simplify comparison\n" + "expressions in filter operators using column stats"), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3a5aec7d6b..0bb91f37a1 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -529,6 +529,7 @@ minillaplocal.query.files=\ jdbc_handler.q,\ join1.q,\ join_acid_non_acid.q,\ + join_constraints_optimization.q,\ join_filters.q,\ join_max_hashtable.q,\ join_nulls.q,\ @@ -582,6 +583,8 @@ minillaplocal.query.files=\ materialized_view_rewrite_8.q,\ materialized_view_rewrite_9.q,\ materialized_view_rewrite_10.q,\ + materialized_view_rewrite_no_join_opt.q,\ + materialized_view_rewrite_no_join_opt_2.q,\ materialized_view_rewrite_part_1.q,\ materialized_view_rewrite_part_2.q,\ materialized_view_rewrite_ssb.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 268284a6da..dc0a84b37d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -17,17 +17,26 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import com.google.common.collect.Multimap; import java.util.AbstractList; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import com.google.common.collect.ImmutableList; +import java.util.Map.Entry; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -35,6 +44,7 @@ import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -460,4 +470,58 @@ public static RelNode createSingleValueAggRel( return aggregateFactory.createAggregate(rel, false, ImmutableBitSet.of(), null, aggCalls); } + + /** + * Given a RelNode, it checks whether there is any filtering condition + * below. Basically we check whether the operators + * below altered the PK cardinality in any way + */ + public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode operator) { + final Multimap, RelNode> nodesBelowNonFkInput = + mq.getNodeTypes(operator); + for (Entry, Collection> e : + nodesBelowNonFkInput.asMap().entrySet()) { + if (e.getKey() == TableScan.class) { + if (e.getValue().size() > 1) { + // Bail out as we may not have more than one TS on non-FK side + return true; + } + } else if (e.getKey() == Project.class) { + // We check there is no windowing expression + for (RelNode node : e.getValue()) { + Project p = (Project) node; + for (RexNode expr : p.getChildExps()) { + if (expr instanceof RexOver) { + // Bail out as it may change cardinality + return true; + } + } + } + } else if (e.getKey() == Aggregate.class) { + // We check there is are not grouping sets + for (RelNode node : e.getValue()) { + Aggregate a = (Aggregate) node; + if (a.getGroupType() != Group.SIMPLE) { + // Bail out as it may change cardinality + return true; + } + } + } else if (e.getKey() == Sort.class) { + // We check whether there is a limit clause + for (RelNode node : e.getValue()) { + Sort s = (Sort) node; + if (s.fetch != null || s.offset != null) { + // Bail out as it may change cardinality + return true; + } + } + } else { + // Bail out, we cannot rewrite the expression if non-fk side cardinality + // is being altered + return true; + } + } + // It passed all the tests + return false; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java new file mode 100644 index 0000000000..161fd0ee0d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -0,0 +1,496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Aggregate.Group; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexPermuteInputsShuttle; +import org.apache.calcite.rex.RexTableInputRef; +import org.apache.calcite.rex.RexTableInputRef.RelTableRef; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.MappingType; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The rule can perform two different optimizations. + * 1) Removes a join if is does not alter the cardinality of the one of its inputs. + * In particular, this rule is triggered if: + * - it is a join on PK-FK/UK-FK, + * - project on top only references columns from the FK side, and + * - PK/UK side is not filtered + * It optionally adds an IS NOT NULL filter if any FK column can be nullable + * 2) Transforms a left/right outer join into an inner join if: + * - it is a join on PK-FK/UK-FK, + * - FK is not nullable + * - PK/UK side is not filtered + */ +public class HiveJoinConstraintsRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger(HiveJoinConstraintsRule.class); + + public static final HiveJoinConstraintsRule INSTANCE = + new HiveJoinConstraintsRule(HiveRelFactories.HIVE_BUILDER); + + + protected HiveJoinConstraintsRule(RelBuilderFactory relBuilder) { + super( + operand(Project.class, + some(operand(Join.class, any()))), + relBuilder, "HiveJoinConstraintsRule"); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Project project = call.rel(0); + final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + List topProjExprs = project.getChildExps(); + Join join = call.rel(1); + final JoinRelType joinType = join.getJoinType(); + final RelNode leftInput = join.getLeft(); + final RelNode rightInput = join.getRight(); + final RexNode cond = join.getCondition(); + + // 1) If it is an inner, check whether project only uses columns from one side. + // That side will need to be the FK side. + // If it is a left outer, left will be the FK side. + // If it is a right outer, right will be the FK side. + final RelNode fkInput; + final RelNode nonFkInput; + final ImmutableBitSet topRefs = + RelOptUtil.InputFinder.bits(topProjExprs, null); + final ImmutableBitSet leftBits = + ImmutableBitSet.range(leftInput.getRowType().getFieldCount()); + final ImmutableBitSet rightBits = + ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), + join.getRowType().getFieldCount()); + // These boolean values represent corresponding left, right input which is potential FK + boolean leftInputPotentialFK = topRefs.intersects(leftBits); + boolean rightInputPotentialFK = topRefs.intersects(rightBits); + if (leftInputPotentialFK && rightInputPotentialFK && joinType == JoinRelType.INNER) { + // Both inputs are referenced. Before making a decision, try to swap + // references in join condition if it is an inner join, i.e. if a join + // condition column is referenced above the join, then we can just + // reference the column from the other side. + // For example, given two relations R(a1,a2), S(b1) : + // SELECT a2, b1 FROM R, S ON R.a1=R.b1 => + // SELECT a2, a1 FROM R, S ON R.a1=R.b1 + int joinFieldCount = join.getRowType().getFieldCount(); + Mapping mappingLR = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount); + Mapping mappingRL = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount); + for (RexNode conj : RelOptUtil.conjunctions(cond)) { + if (!conj.isA(SqlKind.EQUALS)) { + continue; + } + RexCall eq = (RexCall) conj; + RexNode op1 = eq.getOperands().get(0); + RexNode op2 = eq.getOperands().get(1); + if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) { + // Check references + int ref1 = ((RexInputRef) op1).getIndex(); + int ref2 = ((RexInputRef) op2).getIndex(); + int leftRef = -1; + int rightRef = -1; + if (leftBits.get(ref1) && rightBits.get(ref2)) { + leftRef = ref1; + rightRef = ref2; + } else if (rightBits.get(ref1) && leftBits.get(ref2)) { + leftRef = ref2; + rightRef = ref1; + } + if (leftRef != -1 && rightRef != -1) { + // We do not add more than one mapping per source + // as it is useless + if (mappingLR.getTargetOpt(leftRef) == -1) { + mappingLR.set(leftRef, rightRef); + } + if (mappingRL.getTargetOpt(rightRef) == -1) { + mappingRL.set(rightRef, leftRef); + } + } + } + } + if (mappingLR.size() != 0) { + // First insert missing elements into the mapping as identity mappings + for (int i = 0; i < joinFieldCount; i++) { + if (mappingLR.getTargetOpt(i) == -1) { + mappingLR.set(i, i); + } + if (mappingRL.getTargetOpt(i) == -1) { + mappingRL.set(i, i); + } + } + // Then, we start by trying to reference only left side in top projections + List swappedTopProjExprs = topProjExprs.stream() + .map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingRL, call.rel(1)))) + .collect(Collectors.toList()); + rightInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(rightBits); + if (!rightInputPotentialFK) { + topProjExprs = swappedTopProjExprs; + } else { + // If it did not work, we try to reference only right side in top projections + swappedTopProjExprs = topProjExprs.stream() + .map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingLR, call.rel(1)))) + .collect(Collectors.toList()); + leftInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(leftBits); + if (!leftInputPotentialFK) { + topProjExprs = swappedTopProjExprs; + } + } + } + } else if (!leftInputPotentialFK && !rightInputPotentialFK) { + // TODO: There are no references in the project operator above. + // In this case, we should probably do two passes, one for + // left as FK and one for right as FK, although it may be expensive. + // Currently we only assume left as FK + leftInputPotentialFK = true; + } + + final Mode mode; + switch (joinType) { + case INNER: + if (leftInputPotentialFK && rightInputPotentialFK) { + // Bails out as it references columns from both sides (or no columns) + // and there is nothing to transform + return; + } + fkInput = leftInputPotentialFK ? leftInput : rightInput; + nonFkInput = leftInputPotentialFK ? rightInput : leftInput; + mode = Mode.REMOVE; + break; + case LEFT: + fkInput = leftInput; + nonFkInput = rightInput; + mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; + break; + case RIGHT: + fkInput = rightInput; + nonFkInput = leftInput; + mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; + break; + default: + // Other type, bail out + return; + } + + // 2) Check whether there is any filtering condition on the + // non-FK side. Basically we check whether the operators + // below altered the PK cardinality in any way + final RelMetadataQuery mq = call.getMetadataQuery(); + if (HiveRelOptUtil.isRowFilteringPlan(mq, nonFkInput)) { + return; + } + + // 3) Check whether there is an FK relationship + if (join.getJoinType() != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = (Join) call.builder() + .push(leftInput).push(rightInput) + .join(JoinRelType.INNER, cond) + .build(); + } + final Map refToRex = new HashMap<>(); + final EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : RelOptUtil.conjunctions(cond)) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, we bail out + return; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, bail out + return; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, bail out + return; + } + List eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // Bail out as this condition could not be map into an input reference + return; + } + refToRex.put(inputRef1, eqOp1); + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + return; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalenceClass(inputRef1, inputRef2); + refToRex.put(inputRef2, eqOp2); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalenceClass(inputRef1, inputRef2); + } + } + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return; + } + + // 4) Gather all tables from the FK side and the table from the + // non-FK side + final Set leftTables = mq.getTableReferences(leftInput); + final Set rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(leftInput)); + final Set fkTables = leftInputPotentialFK ? leftTables : rightTables; + final Set nonFkTables = leftInputPotentialFK ? rightTables : leftTables; + assert nonFkTables.size() == 1; + final RelTableRef nonFkTable = nonFkTables.iterator().next(); + final List nonFkTableQName = nonFkTable.getQualifiedName(); + + // 5) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + boolean canBeRewritten = false; + List nullableNodes = new ArrayList<>(); + for (RelTableRef tRef : fkTables) { + List constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + if (foreignKeyColumnType.isNullable()) { + if (joinType == JoinRelType.INNER) { + // If it is nullable and it is an INNER, we just need a IS NOT NULL filter + RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); + assert originalCondOp != null; + nullableNodes.add(originalCondOp); + } else { + // If it is nullable and this is not an INNER, we cannot execute any transformation + allContained = false; + break; + } + } + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + // Remove this condition from eq classes as we have checked that it is present + // in the join condition + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); + } + ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { + // We made it + canBeRewritten = true; + break; + } + } + } + } + + // 6) If it is the only condition, we can trigger the rewriting + if (canBeRewritten) { + // If we reach here, we trigger the transform + if (mode == Mode.REMOVE) { + if (rightInputPotentialFK) { + // First, if FK is the right input, we need to shift + nullableNodes = nullableNodes.stream() + .map(node -> RexUtil.shift(node, 0, -leftInput.getRowType().getFieldCount())) + .collect(Collectors.toList()); + topProjExprs = topProjExprs.stream() + .map(node -> RexUtil.shift(node, 0, -leftInput.getRowType().getFieldCount())) + .collect(Collectors.toList()); + } + // Fix nullability in references to the input node + topProjExprs = RexUtil.fixUp(rexBuilder, topProjExprs, RelOptUtil.getFieldTypeList(fkInput.getRowType())); + // Trigger transformation + if (nullableNodes.isEmpty()) { + call.transformTo(call.builder() + .push(fkInput) + .project(topProjExprs) + .convert(project.getRowType(), false) + .build()); + } else { + RexNode newFilterCond; + if (nullableNodes.size() == 1) { + newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNodes.get(0)); + } else { + List isNotNullConds = new ArrayList<>(); + for (RexNode nullableNode : nullableNodes) { + isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNode)); + } + newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, isNotNullConds); + } + call.transformTo(call.builder() + .push(fkInput) + .filter(newFilterCond) + .project(topProjExprs) + .convert(project.getRowType(), false) + .build()); + } + } else { // Mode.TRANSFORM + // Trigger transformation + call.transformTo(call.builder() + .push(leftInput).push(rightInput) + .join(JoinRelType.INNER, join.getCondition()) + .project(project.getChildExps()) + .build()); + } + } + } + + private static RexTableInputRef extractTableInputRef(RexNode node) { + RexTableInputRef ref = null; + if (node instanceof RexTableInputRef) { + ref = (RexTableInputRef) node; + } else if (RexUtil.isLosslessCast(node) && + ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { + ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); + } + return ref; + } + + /** + * Class representing an equivalence class, i.e., a set of equivalent columns + * + * TODO: This is a subset of a private class in materialized view rewriting + * in Calcite. It should be moved to its own class in Calcite so it can be + * accessible here. + */ + private static class EquivalenceClasses { + + private final Map> nodeToEquivalenceClass; + + protected EquivalenceClasses() { + nodeToEquivalenceClass = new HashMap<>(); + } + + protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + Set c1 = nodeToEquivalenceClass.get(p1); + Set c2 = nodeToEquivalenceClass.get(p2); + if (c1 != null && c2 != null) { + // Both present, we need to merge + if (c1.size() < c2.size()) { + // We swap them to merge + c1 = c2; + p1 = p2; + } + for (RexTableInputRef newRef : c2) { + c1.add(newRef); + nodeToEquivalenceClass.put(newRef, c1); + } + } else if (c1 != null) { + // p1 present, we need to merge into it + c1.add(p2); + nodeToEquivalenceClass.put(p2, c1); + } else if (c2 != null) { + // p2 present, we need to merge into it + c2.add(p1); + nodeToEquivalenceClass.put(p1, c2); + } else { + // None are present, add to same equivalence class + Set equivalenceClass = new LinkedHashSet<>(); + equivalenceClass.add(p1); + equivalenceClass.add(p2); + nodeToEquivalenceClass.put(p1, equivalenceClass); + nodeToEquivalenceClass.put(p2, equivalenceClass); + } + } + + protected Map> getEquivalenceClassesMap() { + return nodeToEquivalenceClass; + } + + protected static EquivalenceClasses copy(EquivalenceClasses ec) { + final EquivalenceClasses newEc = new EquivalenceClasses(); + for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { + newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); + } + return newEc; + } + } + + private enum Mode { + // Removes join operator from the plan + REMOVE, + // Transforms LEFT/RIGHT outer join into INNER join + TRANSFORM + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java new file mode 100644 index 0000000000..43c78968d0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.rel.rules.ProjectJoinTransposeRule; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; + +public class HiveProjectJoinTransposeRule extends ProjectJoinTransposeRule { + + public static final HiveProjectJoinTransposeRule INSTANCE = + new HiveProjectJoinTransposeRule(HiveRelFactories.HIVE_BUILDER); + + private HiveProjectJoinTransposeRule(RelBuilderFactory relBuilderFactory) { + super(expr -> true, relBuilderFactory); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java index 07518df9ec..67ecdec041 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java @@ -20,11 +20,14 @@ import java.util.Set; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.rules.ProjectMergeRule; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; @@ -66,4 +69,22 @@ public boolean matches(RelOptRuleCall call) { return super.matches(call); } + public void onMatch(RelOptRuleCall call) { + final Project topProject = call.rel(0); + final Project bottomProject = call.rel(1); + + // If top project does not reference any column at the bottom project, + // we can just remove botton project + final ImmutableBitSet topRefs = + RelOptUtil.InputFinder.bits(topProject.getChildExps(), null); + if (topRefs.isEmpty()) { + RelBuilder relBuilder = call.builder(); + relBuilder.push(bottomProject.getInput()); + relBuilder.project(topProject.getChildExps()); + call.transformTo(relBuilder.build()); + return; + } + super.onMatch(call); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 8a1a2520c8..93f2b290b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -100,7 +100,6 @@ import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.SqlSampleSpec; import org.apache.calcite.sql.SqlWindow; import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.parser.SqlParserPos; @@ -114,8 +113,6 @@ import org.apache.calcite.util.Pair; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ObjectPair; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -191,6 +188,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinConstraintsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; @@ -198,6 +196,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersectRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; @@ -1660,7 +1659,8 @@ private RowResolver genRowResolver(Operator op, QB qb) { private enum ExtendedCBOProfile { JOIN_REORDERING, - WINDOWING_POSTPROCESSING; + WINDOWING_POSTPROCESSING, + REFERENTIAL_CONSTRAINTS; } /** @@ -1789,9 +1789,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } // 4.1 Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin. // Don't run this rule if hive is to remove sq_count_check since that rule expects to have project b/w join. - calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, - HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); + calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); // 4.2 Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with @@ -2056,6 +2056,11 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses)); } + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_CONSTRAINTS_JOIN) && + profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) { + rules.add(HiveProjectJoinTransposeRule.INSTANCE); + rules.add(HiveJoinConstraintsRule.INSTANCE); + } rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN); rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); @@ -2115,25 +2120,19 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Projection Pruning"); - // 8. Merge, remove and reduce Project if possible - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, - HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Merge Project-Project, Merge Sort-Sort"); - - // 9. Rerun PPD through Project as column pruning would have introduced + // 8. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS + // Also merge, remove and reduce Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, - HiveProjectFilterPullUpConstantsRule.INSTANCE); + HiveProjectFilterPullUpConstantsRule.INSTANCE, HiveProjectMergeRule.INSTANCE, + ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Rerun PPD"); - return basePlan; } @@ -2819,6 +2818,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } // 4. Build operator + RelOptHiveTable optTable; if (tableType == TableType.DRUID || (tableType == TableType.JDBC && tabMetaData.getProperty("hive.sql.table") != null)) { // Create case sensitive columns list @@ -2874,7 +2874,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals, null, null); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, + optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), @@ -2884,11 +2884,10 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap()); - } else if (tableType == TableType.JDBC) { - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, + } else { + optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); - final HiveTableScan hts = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, @@ -2926,7 +2925,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc fullyQualifiedTabName.add(tabMetaData.getDbName()); } fullyQualifiedTabName.add(tabMetaData.getTableName()); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, + optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); // Build Hive Table Scan Rel @@ -2937,6 +2936,10 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); } + if (!optTable.getReferentialConstraints().isEmpty()) { + profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS); + } + // 6. Add Schema(RR) to RelNode-Schema map ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, tableRel); diff --git a/ql/src/test/queries/clientpositive/join_constraints_optimization.q b/ql/src/test/queries/clientpositive/join_constraints_optimization.q new file mode 100644 index 0000000000..5981b04a88 --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_constraints_optimization.q @@ -0,0 +1,136 @@ +set hive.strict.checks.cartesian.product=false; + +CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +-- CAN BE REMOVED AND DOES NOT NEED FILTER ON JOIN COLUMN +-- AS COLUMN IS ALREADY NOT NULLABLE +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- CAN BE REMOVED AND INTRODUCES A FILTER ON JOIN COLUMN +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES THE JOIN +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- TRANSFORMS THE JOIN +EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- NOT TRANSFORMED INTO INNER JOIN SINCE JOIN COLUMN IS NULLABLE +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES BOTH JOINS +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVES BOTH JOINS +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVE INNER AND NOT TRANFORM OUTER +EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- REMOVE FIRST OUTER AND NOT TRANFORM SECOND OUTER +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey`; + +-- NOT TRANFORM EITHER +EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; + +-- SWAP AND REMOVE +EXPLAIN +SELECT `lo_linenumber`, `c_custkey` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q index ee4844277e..18b9f7d418 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q @@ -14,7 +14,7 @@ create table emps_n3 ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); analyze table emps_n3 compute statistics for columns; create table depts_n2 ( @@ -29,14 +29,14 @@ create table dependents_n2 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n2 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n2 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n2 compute statistics for columns; create table locations_n2 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n2 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n2 compute statistics for columns; alter table emps_n3 add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q index 4aadd5fb0a..0823f59394 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q @@ -14,7 +14,7 @@ create table emps_n9 ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps_n9 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (120, 10, 'Theodore', 10000, 250); analyze table emps_n9 compute statistics for columns; create table depts_n7 ( @@ -29,14 +29,14 @@ create table dependents_n5 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n5 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n5 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n5 compute statistics for columns; create table locations_n5 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n5 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n5 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n5 compute statistics for columns; alter table emps_n9 add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q index dc20b68ba9..6724cec771 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q @@ -14,7 +14,7 @@ create table emps_n5 ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps_n5 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); analyze table emps_n5 compute statistics for columns; create table depts_n4 ( @@ -29,14 +29,14 @@ create table dependents_n3 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n3 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n3 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n3 compute statistics for columns; create table locations_n3 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n3 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n3 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n3 compute statistics for columns; alter table emps_n5 add constraint pk1 primary key (empid) disable novalidate rely; @@ -78,7 +78,8 @@ from emps_n5 group by name, salary; drop materialized view mv1_n3; --- EXAMPLE 25 +-- EXAMPLE 25: REWRITING NOT TRIGGERED WHEN JOIN CONSTRAINTS +-- OPTIMIZATION IS ENABLED create materialized view mv1_n3 as select empid, emps_n5.deptno, count(*) as c, sum(empid) as s from emps_n5 join depts_n4 using (deptno) diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q index 0e4fdf49ac..d87928c073 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q @@ -14,7 +14,7 @@ create table emps_n2 ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps_n2 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); analyze table emps_n2 compute statistics for columns; create table depts_n1 ( @@ -29,14 +29,14 @@ create table dependents_n1 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n1 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n1 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n1 compute statistics for columns; create table locations_n1 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n1 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n1 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n1 compute statistics for columns; alter table emps_n2 add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q index 4f05f76330..23fc3c14ce 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q @@ -14,7 +14,7 @@ create table emps ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); analyze table emps compute statistics for columns; create table depts ( @@ -29,14 +29,14 @@ create table dependents ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents values (10, 'Michael'), (10, 'Jane'); +insert into dependents values (10, 'Michael'), (20, 'Jane'); analyze table dependents compute statistics for columns; create table locations ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations compute statistics for columns; alter table emps add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q index 59ed575756..3d1cedc4f5 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q @@ -29,14 +29,14 @@ create table dependents_n4 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n4 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n4 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n4 compute statistics for columns; create table locations_n4 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n4 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n4 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n4 compute statistics for columns; alter table emps_n8 add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt.q new file mode 100644 index 0000000000..8de9c7087a --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt.q @@ -0,0 +1,132 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.optimize.constraints.join=false; + +create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); +analyze table emps_n30 compute statistics for columns; + +create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts_n20 compute statistics for columns; + +create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents_n20 values (10, 'Michael'), (20, 'Jane'); +analyze table dependents_n20 compute statistics for columns; + +create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego'); +analyze table locations_n20 compute statistics for columns; + +alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely; +alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely; + +-- EXAMPLE 1 +create materialized view mv1_n20 as +select deptno, name, salary, commission +from emps_n30; +analyze table mv1_n20 compute statistics for columns; + +explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +drop materialized view mv1_n20; + +-- EXAMPLE 2 +create materialized view mv1_n20 as +select empid, emps_n30.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by empid, emps_n30.deptno; +analyze table mv1_n20 compute statistics for columns; + +explain +select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno; + +select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno; + +drop materialized view mv1_n20; + +-- EXAMPLE 3 +create materialized view mv1_n20 as +select dependents_n20.empid, emps_n30.deptno, sum(salary) as s +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid, emps_n30.deptno; +analyze table mv1_n20 compute statistics for columns; + +explain +select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid; + +select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid; + +drop materialized view mv1_n20; + +-- EXAMPLE 4 +create materialized view mv1_n20 as +select emps_n30.empid, emps_n30.deptno, emps_n30.name as name1, emps_n30.salary, emps_n30.commission, dependents_n20.name as name2 +from emps_n30 join dependents_n20 on (emps_n30.empid = dependents_n20.empid); +analyze table mv1_n20 compute statistics for columns; + +explain +select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill'; + +select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill'; + +drop materialized view mv1_n20; + + + + + diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt_2.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt_2.q new file mode 100644 index 0000000000..a1372301fe --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_no_join_opt_2.q @@ -0,0 +1,144 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.optimize.constraints.join=false; + +create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); +analyze table emps_n30 compute statistics for columns; + +create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts_n20 compute statistics for columns; + +create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents_n20 values (10, 'Michael'), (20, 'Jane'); +analyze table dependents_n20 compute statistics for columns; + +create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego'); +analyze table locations_n20 compute statistics for columns; + +alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely; +alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely; + +-- EXAMPLE 1 +create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 150; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno); + +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno); + +drop materialized view mv1_part_n2; + +-- EXAMPLE 2 +create materialized view mv1_part_n2 partitioned on (deptno) as +select deptno, name, salary, commission +from emps_n30; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +drop materialized view mv1_part_n2; + +-- EXAMPLE 4 +create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 200; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150; + +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 5 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name, salary from emps_n30 group by name, salary; + +select name, salary from emps_n30 group by name, salary; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 6 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 group by name; + +select name from emps_n30 group by name; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 7 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 where deptno = 10 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 where deptno = 10 group by name; + +select name from emps_n30 where deptno = 10 group by name; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 9 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary, count(*) as c, sum(empid) as s +from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 group by name; + +select name from emps_n30 group by name; + +drop materialized view mv1_part_n2; diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q index 5a2e74c8a0..e6980c07f1 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q @@ -15,7 +15,7 @@ create table emps_n30 ( commission int) stored as orc TBLPROPERTIES ('transactional'='true'); insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); analyze table emps_n30 compute statistics for columns; create table depts_n20 ( @@ -30,14 +30,14 @@ create table dependents_n20 ( empid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into dependents_n20 values (10, 'Michael'), (10, 'Jane'); +insert into dependents_n20 values (10, 'Michael'), (20, 'Jane'); analyze table dependents_n20 compute statistics for columns; create table locations_n20 ( locationid int, name varchar(256)) stored as orc TBLPROPERTIES ('transactional'='true'); -insert into locations_n20 values (10, 'San Francisco'), (10, 'San Diego'); +insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego'); analyze table locations_n20 compute statistics for columns; alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely; diff --git a/ql/src/test/results/clientpositive/ambiguitycheck.q.out b/ql/src/test/results/clientpositive/ambiguitycheck.q.out index 80c9582fec..e19f4b6312 100644 --- a/ql/src/test/results/clientpositive/ambiguitycheck.q.out +++ b/ql/src/test/results/clientpositive/ambiguitycheck.q.out @@ -705,7 +705,7 @@ PREHOOK: query: explain extended select int(1.2) from src limit 1 PREHOOK: type: QUERY POSTHOOK: query: explain extended select int(1.2) from src limit 1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1.2 AS INTEGER) AS `_o__c0` +OPTIMIZED SQL: SELECT CAST(1.2 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out index 7813aac294..22305f50d2 100644 --- a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out +++ b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out @@ -124,7 +124,7 @@ STAGE PLANS: alias: default.cmv_mat_view2_n4 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 3 (type: int), c (type: decimal(10,2)) + expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE ListSink diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 806262d72e..85ccd44e92 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -164,12 +164,12 @@ STAGE PLANS: TableScan alias: cmv_mat_view2_n0 properties: - druid.fieldNames vc,c + druid.fieldNames a,c druid.fieldTypes int,double - druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"3","outputType":"LONG"}],"columns":["vc","c"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["a","c"],"resultFormat":"compactedList"} druid.query.type scan Select Operator - expressions: vc (type: int), c (type: double) + expressions: a (type: int), c (type: double) outputColumnNames: _col0, _col1 ListSink diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index bd8e215c22..5289fd85e1 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -361,7 +361,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from list_bucketing_static_part_n4 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n4` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 520d48e3d9..7881bbaf41 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -804,7 +804,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from list_bucketing_static_part_n2 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n2` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index fbd4fde1bd..acebfe8184 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -804,7 +804,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from list_bucketing_static_part_n0 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index e324cab738..e8cc54ca3e 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -80,7 +80,7 @@ PREHOOK: query: explain extended SELECT key FROM fact_daily WHERE ( ds='1' and h PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: @@ -203,7 +203,7 @@ PREHOOK: query: explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('238' AS STRING) AS `key`, CAST('val_238' AS STRING) AS `value` +OPTIMIZED SQL: SELECT CAST('238' AS STRING) AS `$f0`, CAST('val_238' AS STRING) AS `$f1` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '238' AND `value` = 'val_238' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index ec1e54060c..534c924ba1 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -253,7 +253,7 @@ PREHOOK: query: explain extended SELECT key FROM fact_daily_n2 WHERE ds='1' and PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT key FROM fact_daily_n2 WHERE ds='1' and hr='4' and key= '406' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('406' AS STRING) AS `key` +OPTIMIZED SQL: SELECT CAST('406' AS STRING) AS `$f0` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '406' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 889f23c6da..32ffe076fa 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -343,7 +343,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, CAST('1' AS STRING) AS `ds`, CAST('2' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('1' AS STRING) AS `$f2`, CAST('2' AS STRING) AS `$f3` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '2' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: @@ -468,7 +468,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('327' AS STRING) AS `key`, CAST('val_327' AS STRING) AS `value`, CAST('1' AS STRING) AS `ds`, CAST('3' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('327' AS STRING) AS `$f0`, CAST('val_327' AS STRING) AS `$f1`, CAST('1' AS STRING) AS `$f2`, CAST('3' AS STRING) AS `$f3` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '3' AND `key` = '327' AND `value` = 'val_327' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out index dcff8a5037..77b56d3f6d 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out @@ -131,7 +131,7 @@ PREHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x= PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x=484 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x` +OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 484 STAGE DEPENDENCIES: @@ -251,7 +251,7 @@ PREHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x= PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x=495 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(495 AS INTEGER) AS `x` +OPTIMIZED SQL: SELECT CAST(495 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 495 STAGE DEPENDENCIES: @@ -371,7 +371,7 @@ PREHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x= PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT x FROM fact_daily_n4 WHERE ds='1' and x=1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `x` +OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 1 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index 268051e2ac..022af858f6 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -134,7 +134,7 @@ PREHOOK: query: explain extended select x from (select x from fact_daily_n5 wher PREHOOK: type: QUERY POSTHOOK: query: explain extended select x from (select x from fact_daily_n5 where ds = '1') subq where x = 484 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x` +OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n5` WHERE `ds` = '1' AND `x` = 484 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 3951b71227..a12ac3ad2d 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -30,7 +30,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM acidTblDefault WHERE a = 1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `a` +OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0` FROM `default`.`acidtbldefault` WHERE `a` = 1 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out index 55442ad046..d80b911495 100644 --- a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out @@ -410,7 +410,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and value='One' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, CAST('One' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds` +OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0`, CAST('One' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 AND `ds` = '2008-04-08' AND `value` = 'One' STAGE DEPENDENCIES: @@ -475,7 +475,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from srcbucket_pruned where value='One' and key = 1 and ds='2008-04-08' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, CAST('One' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds` +OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0`, CAST('One' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2` FROM `default`.`srcbucket_pruned` WHERE `value` = 'One' AND `key` = 1 AND `ds` = '2008-04-08' STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out index 6831fb2573..428ded210d 100644 --- a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out @@ -43,7 +43,7 @@ PREHOOK: query: explain extended select current_timestamp() from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain extended select current_timestamp() from alltypesorc POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CURRENT_TIMESTAMP() AS `_o__c0` +OPTIMIZED SQL: SELECT CURRENT_TIMESTAMP() AS `$f0` FROM `default`.`alltypesorc` STAGE DEPENDENCIES: Stage-0 is a root stage diff --git a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out new file mode 100644 index 0000000000..8cd267639f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out @@ -0,0 +1,746 @@ +PREHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineorder_removal_n0 +POSTHOOK: query: CREATE TABLE `lineorder_removal_n0`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer_removal_n0`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates_removal_n0`(`d_datekey`) disable rely) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineorder_removal_n0 +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_region` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_custkey (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_custkey (type: bigint), c_region (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + filterExpr: lo_orderdate is not null (type: boolean) + Filter Operator + predicate: lo_orderdate is not null (type: boolean) + Select Operator + expressions: lo_linenumber (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` FROM +(SELECT * +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`) subq +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber` +FROM `lineorder_removal_n0` +LEFT OUTER JOIN `dates_removal_n0` ON `lo_orderdate` = `d_datekey` +LEFT OUTER JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineorder_removal_n0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lo_linenumber (type: int), lo_custkey (type: bigint), lo_orderdate (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_custkey` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT `lo_linenumber`, `c_custkey` +FROM `lineorder_removal_n0` +JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: lineorder_removal_n0 + Select Operator + expressions: lo_linenumber (type: int), lo_custkey (type: bigint) + outputColumnNames: _col0, _col1 + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out index 71adebb2ac..686f307078 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out @@ -123,7 +123,7 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2_n4 Select Operator - expressions: 3 (type: int), c (type: decimal(10,2)) + expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out index ce1c281bea..22e9a69f41 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out @@ -123,7 +123,7 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Select Operator - expressions: 3 (type: int), c (type: decimal(10,2)) + expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out index 98f74379f6..0bef9398fe 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out @@ -159,7 +159,7 @@ STAGE PLANS: TableScan alias: db2.cmv_mat_view2_n2 Select Operator - expressions: 3 (type: int), c (type: decimal(10,2)) + expressions: a (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out index 4d8fa52aa9..99cdadf938 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps_n3 PREHOOK: query: insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps_n3 POSTHOOK: query: insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps_n3 @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n2 -PREHOOK: query: insert into dependents_n2 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n2 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n2 -POSTHOOK: query: insert into dependents_n2 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n2 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n2 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n2 -PREHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n2 -POSTHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n2 @@ -224,16 +224,16 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((empid < 120) and deptno is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: varchar(256)), _col3 (type: float), _col4 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -264,14 +264,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col0 (type: int), _col2 (type: varchar(256)), _col3 (type: float), _col4 (type: int), _col6 (type: varchar(256)), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,7 +300,6 @@ POSTHOOK: Input: default@emps_n3 POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 10 100 Bill 10000.0 1000 Sales 10 -10 110 Bill 10000.0 250 Sales 10 10 110 Theodore 10000.0 250 Sales 10 PREHOOK: query: drop materialized view mv1_n2 PREHOOK: type: DROP_MATERIALIZED_VIEW @@ -345,82 +344,22 @@ from emps_n3 join depts_n2 using (deptno) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: default.mv1_n2 - filterExpr: deptno is not null (type: boolean) - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptno is not null (type: boolean) - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: depts_n2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: default.mv1_n2 + filterExpr: deptno is not null (type: boolean) + Filter Operator + predicate: deptno is not null (type: boolean) + Select Operator + expressions: name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink PREHOOK: query: select emps_n3.name, emps_n3.salary, emps_n3.commission from emps_n3 @@ -634,8 +573,8 @@ POSTHOOK: Input: default@emps_n3 POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 100 10 Bill 10000.0 1000 -110 10 Bill 10000.0 250 110 10 Theodore 10000.0 250 +120 10 Bill 10000.0 250 150 10 Sebastian 7000.0 NULL 200 20 Eric 8000.0 500 PREHOOK: query: drop materialized view mv1_n2 @@ -699,7 +638,7 @@ POSTHOOK: Input: default@emps_n3 #### A masked pattern was here #### 100 10 110 10 -110 10 +120 10 150 10 200 20 PREHOOK: query: drop materialized view mv1_n2 @@ -762,8 +701,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 #### A masked pattern was here #### 100 Bill -110 Bill 110 Theodore +120 Bill 150 Sebastian 200 Eric PREHOOK: query: drop materialized view mv1_n2 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out index 8e54abe61d..6b8d66f5cd 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out @@ -544,7 +544,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -559,20 +560,52 @@ STAGE PLANS: Select Operator expressions: deptno (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: depts_n0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_3.q.out index d7536e4087..a2ac499c74 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_3.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps_n9 PREHOOK: query: insert into emps_n9 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (120, 10, 'Theodore', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps_n9 POSTHOOK: query: insert into emps_n9 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (120, 10, 'Theodore', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps_n9 @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n5 -PREHOOK: query: insert into dependents_n5 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n5 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n5 -POSTHOOK: query: insert into dependents_n5 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n5 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n5 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n5 -PREHOOK: query: insert into locations_n5 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n5 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n5 -POSTHOOK: query: insert into locations_n5 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n5 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n5 @@ -290,11 +290,11 @@ STAGE PLANS: Processor Tree: TableScan alias: default.mv1_n5 - filterExpr: (UDFToInteger(_c0) > 1) (type: boolean) + filterExpr: (UDFToInteger(empid) > 1) (type: boolean) Filter Operator - predicate: (UDFToInteger(_c0) > 1) (type: boolean) + predicate: (UDFToInteger(empid) > 1) (type: boolean) Select Operator - expressions: UDFToInteger(_c0) (type: int) + expressions: UDFToInteger(empid) (type: int) outputColumnNames: _col0 ListSink @@ -313,7 +313,7 @@ POSTHOOK: Input: default@emps_n9 POSTHOOK: Input: default@mv1_n5 #### A masked pattern was here #### 100 -110 +120 150 200 PREHOOK: query: drop materialized view mv1_n5 @@ -368,9 +368,9 @@ STAGE PLANS: Processor Tree: TableScan alias: default.mv1_n5 - filterExpr: (UDFToInteger(_c0) = 1) (type: boolean) + filterExpr: (UDFToInteger(empid) = 1) (type: boolean) Filter Operator - predicate: (UDFToInteger(_c0) = 1) (type: boolean) + predicate: (UDFToInteger(empid) = 1) (type: boolean) Select Operator expressions: 1 (type: int) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out index 3fd4c59ee6..6f675737ca 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps_n5 PREHOOK: query: insert into emps_n5 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps_n5 POSTHOOK: query: insert into emps_n5 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps_n5 @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n3 -PREHOOK: query: insert into dependents_n3 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n3 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n3 -POSTHOOK: query: insert into dependents_n3 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n3 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n3 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n3 -PREHOOK: query: insert into locations_n3 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n3 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n3 -POSTHOOK: query: insert into locations_n3 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n3 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n3 @@ -274,7 +274,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n5 POSTHOOK: Input: default@mv1_n3 #### A masked pattern was here #### -Bill 2 210 +Bill 2 220 Eric 1 200 Sebastian 1 150 Theodore 1 110 @@ -345,7 +345,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n5 POSTHOOK: Input: default@mv1_n3 #### A masked pattern was here #### -10000.0 Bill 210 2 +10000.0 Bill 220 2 10000.0 Theodore 110 1 7000.0 Sebastian 150 1 8000.0 Eric 200 1 @@ -411,14 +411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.mv1_n3 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: deptno (type: int), c (type: bigint), s (type: bigint) - outputColumnNames: deptno, c, s - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + expressions: c (type: bigint), s (type: bigint), deptno (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(c), sum(s) - keys: deptno (type: int) + aggregations: $sum0(_col0), sum(_col1) + keys: _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE @@ -469,7 +469,7 @@ POSTHOOK: Input: default@depts_n4 POSTHOOK: Input: default@emps_n5 POSTHOOK: Input: default@mv1_n3 #### A masked pattern was here #### -10 4 470 +10 4 480 20 1 200 PREHOOK: query: drop materialized view mv1_n3 PREHOOK: type: DROP_MATERIALIZED_VIEW @@ -534,10 +534,10 @@ STAGE PLANS: TableScan alias: default.mv1_n3 filterExpr: (deptno > 10) (type: boolean) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (deptno > 10) (type: boolean) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(s) keys: deptno (type: int) @@ -655,14 +655,14 @@ STAGE PLANS: TableScan alias: default.mv1_n3 filterExpr: (deptno > 10) (type: boolean) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (deptno > 10) (type: boolean) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptno (type: int), s (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out index 9992409f6a..8d633aa6af 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps_n2 PREHOOK: query: insert into emps_n2 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps_n2 POSTHOOK: query: insert into emps_n2 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps_n2 @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n1 -PREHOOK: query: insert into dependents_n1 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n1 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n1 -POSTHOOK: query: insert into dependents_n1 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n1 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n1 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n1 -PREHOOK: query: insert into locations_n1 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n1 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n1 -POSTHOOK: query: insert into locations_n1 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n1 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n1 @@ -612,11 +612,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.mv1_n1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptno (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash @@ -735,7 +735,8 @@ POSTHOOK: Input: default@emps_n2 POSTHOOK: Input: default@mv1_n1 #### A masked pattern was here #### 10 100 100 1 -10 110 220 2 +10 110 110 1 +10 120 120 1 10 150 150 1 20 200 200 1 PREHOOK: query: drop materialized view mv1_n1 @@ -799,67 +800,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: depts_n1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 4 Map Operator Tree: TableScan alias: default.mv1_n1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: empid (type: int), deptno (type: int), s (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: double) + expressions: empid (type: int), s (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col3) - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1079,7 +1046,7 @@ POSTHOOK: query: drop materialized view mv1_n1 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n1 POSTHOOK: Output: default@mv1_n1 -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mv1_n1 as select a.empid deptno from (select * from emps_n2 where empid = 1) a @@ -1161,7 +1128,7 @@ POSTHOOK: query: drop materialized view mv1_n1 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n1 POSTHOOK: Output: default@mv1_n1 -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mv1_n1 as select a.empid, a.deptno from (select * from emps_n2 where empid = 1) a @@ -1310,7 +1277,7 @@ POSTHOOK: query: drop materialized view mv1_n1 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n1 POSTHOOK: Output: default@mv1_n1 -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mv1_n1 as select emps_n2.empid, emps_n2.deptno from emps_n2 join depts_n1 on (emps_n2.deptno = depts_n1.deptno) @@ -1392,7 +1359,7 @@ POSTHOOK: query: drop materialized view mv1_n1 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n1 POSTHOOK: Output: default@mv1_n1 -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mv1_n1 as select emps_n2.empid, emps_n2.deptno from emps_n2 join depts_n1 a on (emps_n2.deptno=a.deptno) diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out index 544c395c01..ae433872a2 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents -PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents -POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations -PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations -POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations @@ -795,82 +795,22 @@ join depts a on (emps.deptno=a.deptno) where emps.name = 'Bill' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 3 - Map Operator Tree: - TableScan - alias: default.mv1 - filterExpr: (CAST( name1 AS STRING) = 'Bill') (type: boolean) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (CAST( name1 AS STRING) = 'Bill') (type: boolean) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: empid (type: int), deptno (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: default.mv1 + filterExpr: (CAST( name1 AS STRING) = 'Bill') (type: boolean) + Filter Operator + predicate: (CAST( name1 AS STRING) = 'Bill') (type: boolean) + Select Operator + expressions: empid (type: int), empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink PREHOOK: query: select emps.empid, dependents.empid, emps.deptno from emps diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index 1e44104ac5..4d9c6d6bea 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n4 -PREHOOK: query: insert into dependents_n4 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n4 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n4 -POSTHOOK: query: insert into dependents_n4 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n4 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n4 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n4 -PREHOOK: query: insert into locations_n4 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n4 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n4 -POSTHOOK: query: insert into locations_n4 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n4 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n4 @@ -255,11 +255,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 10 <- Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Map 10 <- Union 5 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -304,7 +304,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 + Map Operator Tree: + TableScan + alias: depts_n6 + filterExpr: ((deptno > 10) and (deptno <= 11) and name is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno <= 11) and (deptno > 10) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 8 Map Operator Tree: TableScan alias: locations_n4 @@ -324,27 +345,6 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 - Map Operator Tree: - TableScan - alias: depts_n6 - filterExpr: ((deptno > 10) and (deptno <= 11) and name is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((deptno <= 11) and (deptno > 10) and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int), name (type: varchar(256)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(256)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan @@ -374,12 +374,33 @@ STAGE PLANS: Inner Join 0 to 2 keys: 0 _col1 (type: varchar(256)) - 1 _col0 (type: varchar(256)) - 2 _col1 (type: varchar(256)) - outputColumnNames: _col0, _col3 + 1 _col1 (type: varchar(256)) + 2 _col0 (type: varchar(256)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col3 (type: int) + keys: _col1 (type: int), _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -388,7 +409,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -406,7 +427,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -421,25 +442,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: varchar(256)) - sort order: + - Map-reduce partition columns: _col1 (type: varchar(256)) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Union 4 - Vertex: Union 4 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator @@ -550,11 +554,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -601,7 +605,28 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs - Map 6 + Map 7 + Map Operator Tree: + TableScan + alias: depts_n6 + filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 8 Map Operator Tree: TableScan alias: locations_n4 @@ -621,7 +646,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 + Map 9 Map Operator Tree: TableScan alias: emps_n8 @@ -642,27 +667,6 @@ STAGE PLANS: value expressions: _col1 (type: float) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: depts_n6 - filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int), name (type: varchar(256)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(256)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Reducer 11 Execution mode: llap Reduce Operator Tree: @@ -693,13 +697,34 @@ STAGE PLANS: Inner Join 0 to 2 keys: 0 _col1 (type: varchar(256)) - 1 _col0 (type: varchar(256)) - 2 _col3 (type: varchar(256)) - outputColumnNames: _col0, _col4 + 1 _col1 (type: varchar(256)) + 2 _col0 (type: varchar(256)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col4) - keys: _col0 (type: int) + aggregations: count(_col3) + keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE @@ -709,7 +734,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -730,7 +755,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -750,25 +775,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: varchar(256)) - sort order: + - Map-reduce partition columns: _col3 (type: varchar(256)) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float) - Union 4 - Vertex: Union 4 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out new file mode 100644 index 0000000000..43fc1c4a23 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out @@ -0,0 +1,755 @@ +PREHOOK: query: create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps_n30 +POSTHOOK: query: create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps_n30 +PREHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps_n30 +POSTHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps_n30 +POSTHOOK: Lineage: emps_n30.commission SCRIPT [] +POSTHOOK: Lineage: emps_n30.deptno SCRIPT [] +POSTHOOK: Lineage: emps_n30.empid SCRIPT [] +POSTHOOK: Lineage: emps_n30.name SCRIPT [] +POSTHOOK: Lineage: emps_n30.salary SCRIPT [] +PREHOOK: query: analyze table emps_n30 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: default@emps_n30 +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps_n30 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: default@emps_n30 +#### A masked pattern was here #### +PREHOOK: query: create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts_n20 +POSTHOOK: query: create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts_n20 +PREHOOK: query: insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts_n20 +POSTHOOK: query: insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts_n20 +POSTHOOK: Lineage: depts_n20.deptno SCRIPT [] +POSTHOOK: Lineage: depts_n20.locationid SCRIPT [] +POSTHOOK: Lineage: depts_n20.name SCRIPT [] +PREHOOK: query: analyze table depts_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@depts_n20 +PREHOOK: Output: default@depts_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Output: default@depts_n20 +#### A masked pattern was here #### +PREHOOK: query: create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents_n20 +POSTHOOK: query: create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents_n20 +PREHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents_n20 +POSTHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents_n20 +POSTHOOK: Lineage: dependents_n20.empid SCRIPT [] +POSTHOOK: Lineage: dependents_n20.name SCRIPT [] +PREHOOK: query: analyze table dependents_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@dependents_n20 +PREHOOK: Output: default@dependents_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Output: default@dependents_n20 +#### A masked pattern was here #### +PREHOOK: query: create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations_n20 +POSTHOOK: query: create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations_n20 +PREHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations_n20 +POSTHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations_n20 +POSTHOOK: Lineage: locations_n20.locationid SCRIPT [] +POSTHOOK: Lineage: locations_n20.name SCRIPT [] +PREHOOK: query: analyze table locations_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@locations_n20 +PREHOOK: Output: default@locations_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@locations_n20 +POSTHOOK: Output: default@locations_n20 +#### A masked pattern was here #### +PREHOOK: query: alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1_n20 as +select deptno, name, salary, commission +from emps_n30 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: create materialized view mv1_n20 as +select deptno, name, salary, commission +from emps_n30 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: analyze table mv1_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_n20 + filterExpr: deptno is not null (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptno is not null (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: depts_n20 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +Bill 10000.0 1000 +Bill 10000.0 250 +Eric 8000.0 500 +Sebastian 7000.0 NULL +Theodore 10000.0 250 +PREHOOK: query: drop materialized view mv1_n20 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: drop materialized view mv1_n20 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: create materialized view mv1_n20 as +select empid, emps_n30.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by empid, emps_n30.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: create materialized view mv1_n20 as +select empid, emps_n30.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by empid, emps_n30.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: analyze table mv1_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_n20 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), c (type: bigint), s (type: bigint) + outputColumnNames: deptno, c, s + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(c), sum(s) + keys: deptno (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: select depts_n20.deptno, count(*) as c, sum(empid) as s +from emps_n30 join depts_n20 using (deptno) +group by depts_n20.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +10 4 480 +20 1 200 +PREHOOK: query: drop materialized view mv1_n20 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: drop materialized view mv1_n20 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: create materialized view mv1_n20 as +select dependents_n20.empid, emps_n30.deptno, sum(salary) as s +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid, emps_n30.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: create materialized view mv1_n20 as +select dependents_n20.empid, emps_n30.deptno, sum(salary) as s +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid, emps_n30.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: analyze table mv1_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: depts_n20 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: default.mv1_n20 + filterExpr: deptno is not null (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptno is not null (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), s (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3) + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents_n20 +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: select dependents_n20.empid, sum(salary) as s +from emps_n30 +join depts_n20 on (emps_n30.deptno = depts_n20.deptno) +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +group by dependents_n20.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1_n20 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: drop materialized view mv1_n20 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: create materialized view mv1_n20 as +select emps_n30.empid, emps_n30.deptno, emps_n30.name as name1, emps_n30.salary, emps_n30.commission, dependents_n20.name as name2 +from emps_n30 join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: create materialized view mv1_n20 as +select emps_n30.empid, emps_n30.deptno, emps_n30.name as name1, emps_n30.salary, emps_n30.commission, dependents_n20.name as name2 +from emps_n30 join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n20 +PREHOOK: query: analyze table mv1_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 3 + Map Operator Tree: + TableScan + alias: default.mv1_n20 + filterExpr: ((CAST( name1 AS STRING) = 'Bill') and deptno is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((CAST( name1 AS STRING) = 'Bill') and deptno is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill' +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents_n20 +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +POSTHOOK: query: select emps_n30.empid, dependents_n20.empid, emps_n30.deptno +from emps_n30 +join dependents_n20 on (emps_n30.empid = dependents_n20.empid) +join depts_n20 a on (emps_n30.deptno=a.deptno) +where emps_n30.name = 'Bill' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_n20 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1_n20 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n20 +PREHOOK: Output: default@mv1_n20 +POSTHOOK: query: drop materialized view mv1_n20 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n20 +POSTHOOK: Output: default@mv1_n20 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt_2.q.out new file mode 100644 index 0000000000..283e55d907 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt_2.q.out @@ -0,0 +1,1154 @@ +PREHOOK: query: create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps_n30 +POSTHOOK: query: create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps_n30 +PREHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps_n30 +POSTHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps_n30 +POSTHOOK: Lineage: emps_n30.commission SCRIPT [] +POSTHOOK: Lineage: emps_n30.deptno SCRIPT [] +POSTHOOK: Lineage: emps_n30.empid SCRIPT [] +POSTHOOK: Lineage: emps_n30.name SCRIPT [] +POSTHOOK: Lineage: emps_n30.salary SCRIPT [] +PREHOOK: query: analyze table emps_n30 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: default@emps_n30 +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps_n30 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: default@emps_n30 +#### A masked pattern was here #### +PREHOOK: query: create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts_n20 +POSTHOOK: query: create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts_n20 +PREHOOK: query: insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts_n20 +POSTHOOK: query: insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts_n20 +POSTHOOK: Lineage: depts_n20.deptno SCRIPT [] +POSTHOOK: Lineage: depts_n20.locationid SCRIPT [] +POSTHOOK: Lineage: depts_n20.name SCRIPT [] +PREHOOK: query: analyze table depts_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@depts_n20 +PREHOOK: Output: default@depts_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Output: default@depts_n20 +#### A masked pattern was here #### +PREHOOK: query: create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents_n20 +POSTHOOK: query: create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents_n20 +PREHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents_n20 +POSTHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents_n20 +POSTHOOK: Lineage: dependents_n20.empid SCRIPT [] +POSTHOOK: Lineage: dependents_n20.name SCRIPT [] +PREHOOK: query: analyze table dependents_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@dependents_n20 +PREHOOK: Output: default@dependents_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@dependents_n20 +POSTHOOK: Output: default@dependents_n20 +#### A masked pattern was here #### +PREHOOK: query: create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations_n20 +POSTHOOK: query: create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations_n20 +PREHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations_n20 +POSTHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations_n20 +POSTHOOK: Lineage: locations_n20.locationid SCRIPT [] +POSTHOOK: Lineage: locations_n20.name SCRIPT [] +PREHOOK: query: analyze table locations_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@locations_n20 +PREHOOK: Output: default@locations_n20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@locations_n20 +POSTHOOK: Output: default@locations_n20 +#### A masked pattern was here #### +PREHOOK: query: alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 150 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 150 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).commission SIMPLE [(emps_n30)emps_n30.FieldSchema(name:commission, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).empid SIMPLE [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).name SIMPLE [(emps_n30)emps_n30.FieldSchema(name:name, type:varchar(256), comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +PREHOOK: query: explain +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + filterExpr: (empid < 120) (type: boolean) + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid < 120) (type: boolean) + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)), salary (type: float), commission (type: int), deptno (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: depts_n20 + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)), locationid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)), _col2 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: deptno (int) + Target Input: default.mv1_part_n2 + Partition key expr: deptno + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: int), _col0 (type: int), _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int), _col6 (type: varchar(256)), _col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +POSTHOOK: query: select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +10 100 Bill 10000.0 1000 Sales 10 +10 110 Theodore 10000.0 250 Sales 10 +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select deptno, name, salary, commission +from emps_n30 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select deptno, name, salary, commission +from emps_n30 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +POSTHOOK: Output: default@mv1_part_n2@deptno=20 +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).commission SIMPLE [(emps_n30)emps_n30.FieldSchema(name:commission, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).name SIMPLE [(emps_n30)emps_n30.FieldSchema(name:name, type:varchar(256), comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=20).commission SIMPLE [(emps_n30)emps_n30.FieldSchema(name:commission, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=20).name SIMPLE [(emps_n30)emps_n30.FieldSchema(name:name, type:varchar(256), comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=20).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +PREHOOK: Input: default@mv1_part_n2@deptno=20 +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@deptno=10 +PREHOOK: Output: default@mv1_part_n2@deptno=20 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +POSTHOOK: Input: default@mv1_part_n2@deptno=20 +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +POSTHOOK: Output: default@mv1_part_n2@deptno=20 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + filterExpr: deptno is not null (type: boolean) + Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: name (type: varchar(256)), salary (type: float), commission (type: int), deptno (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: varchar(256)), _col1 (type: float), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: depts_n20 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: deptno (int) + Target Input: default.mv1_part_n2 + Partition key expr: deptno + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n20 +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +PREHOOK: Input: default@mv1_part_n2@deptno=20 +#### A masked pattern was here #### +POSTHOOK: query: select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n20 +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +POSTHOOK: Input: default@mv1_part_n2@deptno=20 +#### A masked pattern was here #### +Bill 10000.0 1000 +Bill 10000.0 250 +Eric 8000.0 500 +Sebastian 7000.0 NULL +Theodore 10000.0 250 +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 200 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 200 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).commission SIMPLE [(emps_n30)emps_n30.FieldSchema(name:commission, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).empid SIMPLE [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).name SIMPLE [(emps_n30)emps_n30.FieldSchema(name:name, type:varchar(256), comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(deptno=10).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +PREHOOK: query: explain +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emps_n30 + filterExpr: (empid > 120) (type: boolean) + Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid > 120) (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 526 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 3 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + filterExpr: (empid < 150) (type: boolean) + Statistics: Num rows: 4 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid < 150) (type: boolean) + Statistics: Num rows: 4 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 526 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +POSTHOOK: query: select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@deptno=10 +#### A masked pattern was here #### +100 10 Bill 10000.0 1000 +110 10 Theodore 10000.0 250 +120 10 Bill 10000.0 250 +150 10 Sebastian 7000.0 NULL +200 20 Eric 8000.0 500 +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Eric).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@name=Bill +PREHOOK: Output: default@mv1_part_n2@name=Eric +PREHOOK: Output: default@mv1_part_n2@name=Sebastian +PREHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +PREHOOK: query: explain +select name, salary from emps_n30 group by name, salary +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name, salary from emps_n30 group by name, salary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1_part_n2 + Select Operator + expressions: name (type: varchar(256)), salary (type: float) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select name, salary from emps_n30 group by name, salary +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: select name, salary from emps_n30 group by name, salary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +Bill 10000.0 +Eric 8000.0 +Sebastian 7000.0 +Theodore 10000.0 +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Eric).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@name=Bill +PREHOOK: Output: default@mv1_part_n2@name=Eric +PREHOOK: Output: default@mv1_part_n2@name=Sebastian +PREHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps_n30 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps_n30 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + Statistics: Num rows: 4 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 4 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps_n30 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: select name from emps_n30 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +Bill +Eric +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 where deptno = 10 group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 where deptno = 10 group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@name=Bill +PREHOOK: Output: default@mv1_part_n2@name=Sebastian +PREHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps_n30 where deptno = 10 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps_n30 where deptno = 10 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + Statistics: Num rows: 3 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 3 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps_n30 where deptno = 10 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: select name from emps_n30 where deptno = 10 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +Bill +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 +PREHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary, count(*) as c, sum(empid) as s +from emps_n30 group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n30 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: create materialized view mv1_part_n2 partitioned on (name) as +select name, salary, count(*) as c, sum(empid) as s +from emps_n30 group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).c EXPRESSION [(emps_n30)emps_n30.null, ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).s EXPRESSION [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Bill).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Eric).c EXPRESSION [(emps_n30)emps_n30.null, ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Eric).s EXPRESSION [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Eric).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).c EXPRESSION [(emps_n30)emps_n30.null, ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).s EXPRESSION [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Sebastian).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).c EXPRESSION [(emps_n30)emps_n30.null, ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).s EXPRESSION [(emps_n30)emps_n30.FieldSchema(name:empid, type:int, comment:null), ] +POSTHOOK: Lineage: mv1_part_n2 PARTITION(name=Theodore).salary SIMPLE [(emps_n30)emps_n30.FieldSchema(name:salary, type:float, comment:null), ] +PREHOOK: query: analyze table mv1_part_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +PREHOOK: Output: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2@name=Bill +PREHOOK: Output: default@mv1_part_n2@name=Eric +PREHOOK: Output: default@mv1_part_n2@name=Sebastian +PREHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_part_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +POSTHOOK: Output: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2@name=Bill +POSTHOOK: Output: default@mv1_part_n2@name=Eric +POSTHOOK: Output: default@mv1_part_n2@name=Sebastian +POSTHOOK: Output: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps_n30 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps_n30 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_part_n2 + Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps_n30 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n30 +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Input: default@mv1_part_n2@name=Bill +PREHOOK: Input: default@mv1_part_n2@name=Eric +PREHOOK: Input: default@mv1_part_n2@name=Sebastian +PREHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +POSTHOOK: query: select name from emps_n30 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n30 +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Input: default@mv1_part_n2@name=Bill +POSTHOOK: Input: default@mv1_part_n2@name=Eric +POSTHOOK: Input: default@mv1_part_n2@name=Sebastian +POSTHOOK: Input: default@mv1_part_n2@name=Theodore +#### A masked pattern was here #### +Bill +Eric +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1_part_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_part_n2 +PREHOOK: Output: default@mv1_part_n2 +POSTHOOK: query: drop materialized view mv1_part_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_part_n2 +POSTHOOK: Output: default@mv1_part_n2 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_1.q.out index 3a2ad3daf0..9782cb7fee 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_1.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_1.q.out @@ -19,12 +19,12 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emps_n30 PREHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@emps_n30 POSTHOOK: query: insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), - (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@emps_n30 @@ -94,11 +94,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dependents_n20 -PREHOOK: query: insert into dependents_n20 values (10, 'Michael'), (10, 'Jane') +PREHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dependents_n20 -POSTHOOK: query: insert into dependents_n20 values (10, 'Michael'), (10, 'Jane') +POSTHOOK: query: insert into dependents_n20 values (10, 'Michael'), (20, 'Jane') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@dependents_n20 @@ -128,11 +128,11 @@ stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@locations_n20 -PREHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@locations_n20 -POSTHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: query: insert into locations_n20 values (10, 'San Francisco'), (20, 'San Diego') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@locations_n20 @@ -234,16 +234,16 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (empid < 120) (type: boolean) - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: empid (type: int), name (type: varchar(256)), salary (type: float), commission (type: int), deptno (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int) sort order: + Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) Execution mode: llap LLAP IO: all inputs @@ -289,14 +289,14 @@ STAGE PLANS: 0 _col4 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col0 (type: int), _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int), _col6 (type: varchar(256)), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -327,7 +327,6 @@ POSTHOOK: Input: default@mv1_part_n2 POSTHOOK: Input: default@mv1_part_n2@deptno=10 #### A masked pattern was here #### 10 100 Bill 10000.0 1000 Sales 10 -10 110 Bill 10000.0 250 Sales 10 10 110 Theodore 10000.0 250 Sales 10 PREHOOK: query: drop materialized view mv1_part_n2 PREHOOK: type: DROP_MATERIALIZED_VIEW @@ -389,90 +388,20 @@ from emps_n30 join depts_n20 using (deptno) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: default.mv1_part_n2 - filterExpr: deptno is not null (type: boolean) - Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: name (type: varchar(256)), salary (type: float), commission (type: int), deptno (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 5 Data size: 505 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: varchar(256)), _col1 (type: float), _col2 (type: int) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: depts_n20 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: deptno (int) - Target Input: default.mv1_part_n2 - Partition key expr: deptno - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Execution mode: llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: default.mv1_part_n2 + filterExpr: deptno is not null (type: boolean) + Select Operator + expressions: name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink PREHOOK: query: select emps_n30.name, emps_n30.salary, emps_n30.commission from emps_n30 @@ -628,8 +557,8 @@ POSTHOOK: Input: default@mv1_part_n2 POSTHOOK: Input: default@mv1_part_n2@deptno=10 #### A masked pattern was here #### 100 10 Bill 10000.0 1000 -110 10 Bill 10000.0 250 110 10 Theodore 10000.0 250 +120 10 Bill 10000.0 250 150 10 Sebastian 7000.0 NULL 200 20 Eric 8000.0 500 PREHOOK: query: drop materialized view mv1_part_n2 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out index d39180ecd8..b3c7e1fc3f 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out @@ -598,7 +598,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -613,20 +614,67 @@ STAGE PLANS: Select Operator expressions: deptno (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: depts_n00 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: deptno (int) + Target Input: default.mv1_part_n0 + Partition key expr: deptno + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Execution mode: llap - LLAP IO: all inputs + LLAP IO: may be used (ACID table) Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/macro.q.out b/ql/src/test/results/clientpositive/macro.q.out index 70281acc0b..abd8c0e3d1 100644 --- a/ql/src/test/results/clientpositive/macro.q.out +++ b/ql/src/test/results/clientpositive/macro.q.out @@ -41,7 +41,7 @@ PREHOOK: query: EXPLAIN EXTENDED SELECT SIGMOID(2) FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT SIGMOID(2) FROM src LIMIT 1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(0.8807970779778823 AS DOUBLE) AS `_o__c0` +OPTIMIZED SQL: SELECT CAST(0.8807970779778823 AS DOUBLE) AS `$f0` FROM `default`.`src` LIMIT 1 STAGE DEPENDENCIES: @@ -114,7 +114,7 @@ PREHOOK: query: EXPLAIN EXTENDED SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(2 AS INTEGER) AS `_o__c0` +OPTIMIZED SQL: SELECT CAST(2 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 STAGE DEPENDENCIES: @@ -214,7 +214,7 @@ PREHOOK: query: EXPLAIN EXTENDED SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(10 AS INTEGER) AS `_o__c0` +OPTIMIZED SQL: SELECT CAST(10 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index a2a961c043..95849024d9 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -307,7 +307,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from list_bucketing_static_part_n4 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` +OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n4` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: