diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e4910e4..b0fbf06 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -225,6 +225,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ stats_only_null.q,\ subquery_exists.q,\ subquery_in.q,\ + subquery_restrictions,\ temp_table.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ @@ -643,6 +644,7 @@ minillaplocal.query.files=acid_globallimit.q,\ offset_limit_ppd_optimizer.q,\ cluster.q,\ subquery_in.q,\ + subquery_restrictions,\ stats11.q,\ orc_create.q,\ orc_split_elimination.q,\ @@ -1321,6 +1323,7 @@ spark.query.files=add_part_multiple.q, \ statsfs.q, \ subquery_exists.q, \ subquery_in.q, \ + subquery_restrictions, \ subquery_multiinsert.q, \ table_access_keys_stats.q, \ temp_table.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/SubQueryWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubQueryWalker.java new file mode 100644 index 0000000..d2abb57 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubQueryWalker.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +public class SubQueryWalker extends DefaultGraphWalker { + + /** + * Constructor. + * + * @param disp + * dispatcher to call for each op encountered + */ + public SubQueryWalker (Dispatcher disp) { + super(disp); + } + + + //we bypass SUBQUERY EXPRESSION's children because we will later process this subquery seperately + private boolean shouldByPass(Node childNode, Node parentNode) { + if(parentNode instanceof ASTNode && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR ) + { + ASTNode parentOp = (ASTNode)parentNode; + //subquery either in WHERE IN form OR WHERE EXISTS form + //in first case LHS should not be bypassed + assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3); + if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2) ) + { + return false; + } + return true; + } + return false; + } + /** + * walk the current operator and its descendants. + * + * @param nd + * current operator in the graph + * @throws SemanticException + */ + protected void walk(Node nd) throws SemanticException { + // Push the node in the stack + opStack.push(nd); + + // While there are still nodes to dispatch... + while (!opStack.empty()) { + Node node = opStack.peek(); + + if (node.getChildren() == null || + getDispatchedList().containsAll(node.getChildren())) { + // Dispatch current node + if (!getDispatchedList().contains(node)) { + dispatch(node, opStack); + opQueue.add(node); + } + opStack.pop(); + continue; + } + + // Add a single child and restart the loop + for (Node childNode : node.getChildren()) { + if (!getDispatchedList().contains(childNode)) { + if(shouldByPass(childNode, node)) + { + retMap.put(childNode, null); + } + else { + opStack.push(childNode); + } + break; + } + } + } // end while + } + +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java new file mode 100644 index 0000000..86bebeb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +/** + * Visitor that has methods for the common logical relational expressions. + * This is required for HiveRelDecorrelation. Because we will have mix of + * HiveProject, LogicalProject etc we need an interface which can handle all + */ +public interface HiveRelShuttle extends RelShuttle { + + RelNode visit(HiveProject project); + RelNode visit(HiveFilter filter); + RelNode visit(HiveJoin join); + RelNode visit(HiveAggregate aggregate); +} + +// End RelShuttle.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java new file mode 100644 index 0000000..b92a4f3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalExchange; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMinus; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.util.Stacks; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +import java.util.ArrayList; +import java.util.List; + +public class HiveRelShuttleImpl implements HiveRelShuttle { + protected final List stack = new ArrayList(); + + /** + * Visits a particular child of a parent. + */ + protected RelNode visitChild(RelNode parent, int i, RelNode child) { + Stacks.push(stack, parent); + try { + RelNode child2 = child.accept(this); + if (child2 != child) { + final List newInputs = + new ArrayList(parent.getInputs()); + newInputs.set(i, child2); + return parent.copy(parent.getTraitSet(), newInputs); + } + return parent; + } finally { + Stacks.pop(stack, parent); + } + } + + protected RelNode visitChildren(RelNode rel) { + for (Ord input : Ord.zip(rel.getInputs())) { + rel = visitChild(rel, input.i, input.e); + } + return rel; + } + + public RelNode visit(LogicalAggregate aggregate) { + return visitChild(aggregate, 0, aggregate.getInput()); + } + + public RelNode visit(HiveAggregate aggregate) { + return visitChild(aggregate, 0, aggregate.getInput()); + } + + public RelNode visit(TableScan scan) { + return scan; + } + + public RelNode visit(TableFunctionScan scan) { + return visitChildren(scan); + } + + public RelNode visit(LogicalValues values) { + return values; + } + + public RelNode visit(HiveFilter filter) { + return visitChild(filter, 0, filter.getInput()); + } + public RelNode visit(LogicalFilter filter) { + return visitChild(filter, 0, filter.getInput()); + } + + public RelNode visit(HiveProject project) { + return visitChild(project, 0, project.getInput()); + } + + public RelNode visit(LogicalProject project) { + return visitChild(project, 0, project.getInput()); + } + + public RelNode visit(LogicalJoin join) { + return visitChildren(join); + } + + public RelNode visit(HiveJoin join) { + return visitChildren(join); + } + + public RelNode visit(LogicalCorrelate correlate) { + return visitChildren(correlate); + } + + public RelNode visit(LogicalUnion union) { + return visitChildren(union); + } + + public RelNode visit(LogicalIntersect intersect) { + return visitChildren(intersect); + } + + public RelNode visit(LogicalMinus minus) { + return visitChildren(minus); + } + + public RelNode visit(LogicalSort sort) { + return visitChildren(sort); + } + + public RelNode visit(LogicalExchange exchange) { + return visitChildren(exchange); + } + + public RelNode visit(RelNode other) { + return visitChildren(other); + } +} + +// End RelShuttleImpl.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java index 0410c91..b68eacf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java @@ -22,10 +22,14 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.*; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.calcite.rel.core.CorrelationId; +import java.util.*; public class HiveFilter extends Filter implements HiveRelNode { @@ -48,4 +52,70 @@ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { return mq.getNonCumulativeCost(this); } + private void findCorrelatedVar(RexNode node, Set allVars) + { + if(node instanceof RexCall) + { + RexCall nd = (RexCall)node; + for (RexNode rn : nd.getOperands()) { + if (rn instanceof RexFieldAccess) + { + final RexNode ref = ((RexFieldAccess) rn).getReferenceExpr(); + assert(ref instanceof RexCorrelVariable); + allVars.add(((RexCorrelVariable) ref).id); + } + else { + findCorrelatedVar(rn, allVars); + } + } + } + } + + //traverse the given node to find all correlated variables + // Note that correlated variables are supported in Filter only i.e. Where & Having + private void traverseFilter(RexNode node, Set allVars) + { + if(node instanceof RexSubQuery) + { + //we expect correlated variables in HiveFilter only for now. Also check for case where operator has 0 inputs .e.g TableScan + RelNode input = ((RexSubQuery)node).rel.getInput(0); + while( input != null && !(input instanceof HiveFilter) && input.getInputs().size() >=1) + { + //we don't expect JOINs + assert(input.getInputs().size() == 1); + input = input.getInput(0); + } + if(input != null && input instanceof HiveFilter ) + { + findCorrelatedVar(((HiveFilter)input).getCondition(), allVars); + } + return; + } + //AND, NOT etc + if(node instanceof RexCall) + { + int numOperands = ((RexCall)node).getOperands().size(); + for(int i=0; i getVariablesSet() { + Set allCorrVars = new HashSet<>(); + traverseFilter(condition, allCorrVars); + return allCorrVars; + } + + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java index ba9483e..b175189 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java @@ -35,6 +35,7 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm; @@ -230,4 +232,13 @@ public RelWriter explainTerms(RelWriter pw) { .item("cost", joinCost == null ? "not available" : joinCost); } + + //required for HiveRelDecorrelator + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java index 3e0a9a6..7935e96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java @@ -27,6 +27,7 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import com.google.common.collect.ImmutableList; @@ -196,4 +198,13 @@ public boolean isSynthetic() { return isSysnthetic; } + //required for HiveRelDecorrelator + @Override public RelNode accept(RelShuttle shuttle) { + if(shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java new file mode 100644 index 0000000..c5736d1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -0,0 +1,2941 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.linq4j.function.Function2; +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCostImpl; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.BiRel; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.FilterCorrelateRule; +import org.apache.calcite.rel.rules.FilterJoinRule; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlCountAggFunction; +import org.apache.calcite.sql.fun.SqlSingleValueAggFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.Bug; +import org.apache.calcite.util.Holder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Litmus; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.ReflectUtil; +import org.apache.calcite.util.ReflectiveVisitor; +import org.apache.calcite.util.Stacks; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; +import com.google.common.collect.Sets; +import com.google.common.collect.SortedSetMultimap; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Level; + +/** + * NOTE: this whole logic is replicated from Calcite's RelDecorrelator + * and is exteneded to make it suitable for HIVE + * RelDecorrelator replaces all correlated expressions (corExp) in a relational + * expression (RelNode) tree with non-correlated expressions that are produced + * from joining the RelNode that produces the corExp with the RelNode that + * references it. + * + *

TODO:

+ *
    + *
  • replace {@code CorelMap} constructor parameter with a RelNode + *
  • make {@link #currentRel} immutable (would require a fresh + * RelDecorrelator for each node being decorrelated)
  • + *
  • make fields of {@code CorelMap} immutable
  • + *
  • make sub-class rules static, and have them create their own + * de-correlator
  • + *
+ */ +public class HiveRelDecorrelator implements ReflectiveVisitor { + //~ Static fields/initializers --------------------------------------------- + + protected static final Logger LOG = LoggerFactory.getLogger( + HiveRelDecorrelator.class); + + //~ Instance fields -------------------------------------------------------- + + private final RelBuilder relBuilder; + + // map built during translation + private CorelMap cm; + + private final ReflectUtil.MethodDispatcher dispatcher = + ReflectUtil.createMethodDispatcher(Frame.class, this, "decorrelateRel", + RelNode.class); + + private final RexBuilder rexBuilder; + + // The rel which is being visited + private RelNode currentRel; + + private final Context context; + + /** Built during decorrelation, of rel to all the newly created correlated + * variables in its output, and to map old input positions to new input + * positions. This is from the view point of the parent rel of a new rel. */ + private final Map map = new HashMap<>(); + + private final HashSet generatedCorRels = Sets.newHashSet(); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelDecorrelator ( + RelOptCluster cluster, + CorelMap cm, + Context context) { + this.cm = cm; + this.rexBuilder = cluster.getRexBuilder(); + this.context = context; + relBuilder = RelFactories.LOGICAL_BUILDER.create(cluster, null); + + } + + //~ Methods ---------------------------------------------------------------- + + /** Decorrelates a query. + * + *

This is the main entry point to {@code RelDecorrelator}. + * + * @param rootRel Root node of the query + * + * @return Equivalent query with all + * {@link org.apache.calcite.rel.logical.LogicalCorrelate} instances removed + */ + public static RelNode decorrelateQuery(RelNode rootRel) { + final CorelMap corelMap = new CorelMapBuilder().build(rootRel); + if (!corelMap.hasCorrelation()) { + return rootRel; + } + + final RelOptCluster cluster = rootRel.getCluster(); + final HiveRelDecorrelator decorrelator = + new HiveRelDecorrelator(cluster, corelMap, + cluster.getPlanner().getContext()); + + RelNode newRootRel = decorrelator.removeCorrelationViaRule(rootRel); + + if (!decorrelator.cm.mapCorVarToCorRel.isEmpty()) { + newRootRel = decorrelator.decorrelate(newRootRel); + } + + return newRootRel; + } + + private void setCurrent(RelNode root, LogicalCorrelate corRel) { + currentRel = corRel; + if (corRel != null) { + cm = new CorelMapBuilder().build(Util.first(root, corRel)); + } + } + + private RelNode decorrelate(RelNode root) { + // first adjust count() expression if any + HepProgram program = HepProgram.builder() + .addRuleInstance(new AdjustProjectForCountAggregateRule(false)) + .addRuleInstance(new AdjustProjectForCountAggregateRule(true)) + .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN) + .addRuleInstance(FilterProjectTransposeRule.INSTANCE) + .addRuleInstance(FilterCorrelateRule.INSTANCE) + .build(); + + HepPlanner planner = createPlanner(program); + + planner.setRoot(root); + root = planner.findBestExp(); + + // Perform decorrelation. + map.clear(); + + final Frame frame = getInvoke(root, null); + if (frame != null) { + // has been rewritten; apply rules post-decorrelation + final HepProgram program2 = HepProgram.builder() + .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN) + .addRuleInstance(FilterJoinRule.JOIN) + .build(); + + final HepPlanner planner2 = createPlanner(program2); + final RelNode newRoot = frame.r; + planner2.setRoot(newRoot); + return planner2.findBestExp(); + } + + return root; + } + + private Function2 createCopyHook() { + return new Function2() { + public Void apply(RelNode oldNode, RelNode newNode) { + if (cm.mapRefRelToCorVar.containsKey(oldNode)) { + cm.mapRefRelToCorVar.putAll(newNode, + cm.mapRefRelToCorVar.get(oldNode)); + } + if (oldNode instanceof LogicalCorrelate + && newNode instanceof LogicalCorrelate) { + LogicalCorrelate oldCor = (LogicalCorrelate) oldNode; + CorrelationId c = oldCor.getCorrelationId(); + if (cm.mapCorVarToCorRel.get(c) == oldNode) { + cm.mapCorVarToCorRel.put(c, newNode); + } + + if (generatedCorRels.contains(oldNode)) { + generatedCorRels.add((LogicalCorrelate) newNode); + } + } + return null; + } + }; + } + + private HepPlanner createPlanner(HepProgram program) { + // Create a planner with a hook to update the mapping tables when a + // node is copied when it is registered. + return new HepPlanner( + program, + context, + true, + createCopyHook(), + RelOptCostImpl.FACTORY); + } + + public RelNode removeCorrelationViaRule(RelNode root) { + HepProgram program = HepProgram.builder() + .addRuleInstance(new RemoveSingleAggregateRule()) + .addRuleInstance(new RemoveCorrelationForScalarProjectRule()) + .addRuleInstance(new RemoveCorrelationForScalarAggregateRule()) + .build(); + + HepPlanner planner = createPlanner(program); + + planner.setRoot(root); + return planner.findBestExp(); + } + + protected RexNode decorrelateExpr(RexNode exp) { + DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, null, ImmutableSet.of()); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator, + RexInputRef nullIndicator) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, nullIndicator, + ImmutableSet.of()); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator, + Set isCount) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, null, isCount); + return exp.accept(shuttle); + } + + /** Fallback if none of the other {@code decorrelateRel} methods match. */ + public Frame decorrelateRel(RelNode rel) { + RelNode newRel = rel.copy(rel.getTraitSet(), rel.getInputs()); + + if (rel.getInputs().size() > 0) { + List oldInputs = rel.getInputs(); + List newInputs = Lists.newArrayList(); + for (int i = 0; i < oldInputs.size(); ++i) { + final Frame frame = getInvoke(oldInputs.get(i), rel); + if (frame == null || !frame.corVarOutputPos.isEmpty()) { + // if input is not rewritten, or if it produces correlated + // variables, terminate rewrite + return null; + } + newInputs.add(frame.r); + newRel.replaceInput(i, frame.r); + } + + if (!Util.equalShallow(oldInputs, newInputs)) { + newRel = rel.copy(rel.getTraitSet(), newInputs); + } + } + + // the output position should not change since there are no corVars + // coming from below. + return register(rel, newRel, identityMap(rel.getRowType().getFieldCount()), + ImmutableSortedMap.of()); + } + + /** + * Rewrite Sort. + * + * @param rel Sort to be rewritten + */ + public Frame decorrelateRel(Sort rel) { + // + // Rewrite logic: + // + // 1. change the collations field to reference the new input. + // + + // Sort itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + // Sort only references field positions in collations field. + // The collations field in the newRel now need to refer to the + // new output positions in its input. + // Its output does not change the input ordering, so there's no + // need to call propagateExpr. + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final RelNode newInput = frame.r; + + Mappings.TargetMapping mapping = + Mappings.target( + frame.oldToNewOutputPos, + oldInput.getRowType().getFieldCount(), + newInput.getRowType().getFieldCount()); + + RelCollation oldCollation = rel.getCollation(); + RelCollation newCollation = RexUtil.apply(mapping, oldCollation); + + final Sort newSort = + LogicalSort.create(newInput, newCollation, rel.offset, rel.fetch); + + // Sort does not change input ordering + return register(rel, newSort, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + + /** + * Rewrites a {@link Values}. + * + * @param rel Values to be rewritten + */ + public Frame decorrelateRel(Values rel) { + // There are no inputs, so rel does not need to be changed. + return null; + } + + /** + * Rewrites a {@link LogicalAggregate}. + * + * @param rel Aggregate to rewrite + */ + public Frame decorrelateRel(LogicalAggregate rel) { + if (rel.getGroupType() != Aggregate.Group.SIMPLE) { + throw new AssertionError(Bug.CALCITE_461_FIXED); + } + // + // Rewrite logic: + // + // 1. Permute the group by keys to the front. + // 2. If the input of an aggregate produces correlated variables, + // add them to the group list. + // 3. Change aggCalls to reference the new project. + // + + // Aggregate itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + //I think this is a bug in Calcite where Aggregate seems to always expect + // correlated variable in nodes underneath it which is not true for queries such as + // select p.empno, li.mgr from (select distinct empno as empno from emp) p join emp li on p.empno= li.empno where li.sal = 1 + // and li.deptno in (select deptno from emp where JOB = 'AIR' AND li.mgr=mgr) + + //assert !frame.corVarOutputPos.isEmpty(); + final RelNode newInput = frame.r; + + // map from newInput + Map mapNewInputToProjOutputPos = Maps.newHashMap(); + final int oldGroupKeyCount = rel.getGroupSet().cardinality(); + + // Project projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + List newInputOutput = + newInput.getRowType().getFieldList(); + + int newPos = 0; + + // oldInput has the original group by keys in the front. + final NavigableMap omittedConstants = new TreeMap<>(); + for (int i = 0; i < oldGroupKeyCount; i++) { + final RexLiteral constant = projectedLiteral(newInput, i); + if (constant != null) { + // Exclude constants. Aggregate({true}) occurs because Aggregate({}) + // would generate 1 row even when applied to an empty table. + omittedConstants.put(i, constant); + continue; + } + int newInputPos = frame.oldToNewOutputPos.get(i); + projects.add(RexInputRef.of2(newInputPos, newInputOutput)); + mapNewInputToProjOutputPos.put(newInputPos, newPos); + newPos++; + } + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + if (!frame.corVarOutputPos.isEmpty()) { + // If input produces correlated variables, move them to the front, + // right after any existing GROUP BY fields. + + // Now add the corVars from the input, starting from + // position oldGroupKeyCount. + for (Map.Entry entry + : frame.corVarOutputPos.entrySet()) { + projects.add(RexInputRef.of2(entry.getValue(), newInputOutput)); + + mapCorVarToOutputPos.put(entry.getKey(), newPos); + mapNewInputToProjOutputPos.put(entry.getValue(), newPos); + newPos++; + } + } + + // add the remaining fields + final int newGroupKeyCount = newPos; + for (int i = 0; i < newInputOutput.size(); i++) { + if (!mapNewInputToProjOutputPos.containsKey(i)) { + projects.add(RexInputRef.of2(i, newInputOutput)); + mapNewInputToProjOutputPos.put(i, newPos); + newPos++; + } + } + + assert newPos == newInputOutput.size(); + + // This Project will be what the old input maps to, + // replacing any previous mapping from old input). + RelNode newProject = + RelOptUtil.createProject(newInput, projects, false); + + // update mappings: + // oldInput ----> newInput + // + // newProject + // | + // oldInput ----> newInput + // + // is transformed to + // + // oldInput ----> newProject + // | + // newInput + Map combinedMap = Maps.newHashMap(); + + for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) { + combinedMap.put(oldInputPos, + mapNewInputToProjOutputPos.get( + frame.oldToNewOutputPos.get(oldInputPos))); + } + + register(oldInput, newProject, combinedMap, mapCorVarToOutputPos); + + // now it's time to rewrite the Aggregate + final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount); + List newAggCalls = Lists.newArrayList(); + List oldAggCalls = rel.getAggCallList(); + + int oldInputOutputFieldCount = rel.getGroupSet().cardinality(); + int newInputOutputFieldCount = newGroupSet.cardinality(); + + int i = -1; + for (AggregateCall oldAggCall : oldAggCalls) { + ++i; + List oldAggArgs = oldAggCall.getArgList(); + + List aggArgs = Lists.newArrayList(); + + // Adjust the aggregator argument positions. + // Note aggregator does not change input ordering, so the input + // output position mapping can be used to derive the new positions + // for the argument. + for (int oldPos : oldAggArgs) { + aggArgs.add(combinedMap.get(oldPos)); + } + final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg + : combinedMap.get(oldAggCall.filterArg); + + newAggCalls.add( + oldAggCall.adaptTo(newProject, aggArgs, filterArg, + oldGroupKeyCount, newGroupKeyCount)); + + // The old to new output position mapping will be the same as that + // of newProject, plus any aggregates that the oldAgg produces. + combinedMap.put( + oldInputOutputFieldCount + i, + newInputOutputFieldCount + i); + } + + relBuilder.push( + LogicalAggregate.create(newProject, + false, + newGroupSet, + null, + newAggCalls)); + + if (!omittedConstants.isEmpty()) { + final List postProjects = new ArrayList<>(relBuilder.fields()); + for (Map.Entry entry + : omittedConstants.descendingMap().entrySet()) { + postProjects.add(entry.getKey() + frame.corVarOutputPos.size(), + entry.getValue()); + } + relBuilder.project(postProjects); + } + + // Aggregate does not change input ordering so corVars will be + // located at the same position as the input newProject. + return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos); + } + + public Frame getInvoke(RelNode r, RelNode parent) { + final Frame frame = dispatcher.invoke(r); + if (frame != null) { + map.put(r, frame); + } + currentRel = parent; + return frame; + } + + /** Returns a literal output field, or null if it is not literal. */ + private static RexLiteral projectedLiteral(RelNode rel, int i) { + if (rel instanceof Project) { + final Project project = (Project) rel; + final RexNode node = project.getProjects().get(i); + if (node instanceof RexLiteral) { + return (RexLiteral) node; + } + } + return null; + } + + public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{ + { + if (rel.getGroupType() != Aggregate.Group.SIMPLE) { + throw new AssertionError(Bug.CALCITE_461_FIXED); + } + // + // Rewrite logic: + // + // 1. Permute the group by keys to the front. + // 2. If the input of an aggregate produces correlated variables, + // add them to the group list. + // 3. Change aggCalls to reference the new project. + // + + // Aggregate itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + //assert !frame.corVarOutputPos.isEmpty(); + final RelNode newInput = frame.r; + + // map from newInput + Map mapNewInputToProjOutputPos = Maps.newHashMap(); + final int oldGroupKeyCount = rel.getGroupSet().cardinality(); + + // Project projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + List newInputOutput = + newInput.getRowType().getFieldList(); + + int newPos = 0; + + // oldInput has the original group by keys in the front. + final NavigableMap omittedConstants = new TreeMap<>(); + for (int i = 0; i < oldGroupKeyCount; i++) { + final RexLiteral constant = projectedLiteral(newInput, i); + if (constant != null) { + // Exclude constants. Aggregate({true}) occurs because Aggregate({}) + // would generate 1 row even when applied to an empty table. + omittedConstants.put(i, constant); + continue; + } + int newInputPos = frame.oldToNewOutputPos.get(i); + projects.add(RexInputRef.of2(newInputPos, newInputOutput)); + mapNewInputToProjOutputPos.put(newInputPos, newPos); + newPos++; + } + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + if (!frame.corVarOutputPos.isEmpty()) { + // If input produces correlated variables, move them to the front, + // right after any existing GROUP BY fields. + + // Now add the corVars from the input, starting from + // position oldGroupKeyCount. + for (Map.Entry entry + : frame.corVarOutputPos.entrySet()) { + projects.add(RexInputRef.of2(entry.getValue(), newInputOutput)); + + mapCorVarToOutputPos.put(entry.getKey(), newPos); + mapNewInputToProjOutputPos.put(entry.getValue(), newPos); + newPos++; + } + } + + // add the remaining fields + final int newGroupKeyCount = newPos; + for (int i = 0; i < newInputOutput.size(); i++) { + if (!mapNewInputToProjOutputPos.containsKey(i)) { + projects.add(RexInputRef.of2(i, newInputOutput)); + mapNewInputToProjOutputPos.put(i, newPos); + newPos++; + } + } + + assert newPos == newInputOutput.size(); + + // This Project will be what the old input maps to, + // replacing any previous mapping from old input). + RelNode newProject = + RelOptUtil.createProject(newInput, projects, false); + + // update mappings: + // oldInput ----> newInput + // + // newProject + // | + // oldInput ----> newInput + // + // is transformed to + // + // oldInput ----> newProject + // | + // newInput + Map combinedMap = Maps.newHashMap(); + + for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) { + combinedMap.put(oldInputPos, + mapNewInputToProjOutputPos.get( + frame.oldToNewOutputPos.get(oldInputPos))); + } + + register(oldInput, newProject, combinedMap, mapCorVarToOutputPos); + + // now it's time to rewrite the Aggregate + final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount); + List newAggCalls = Lists.newArrayList(); + List oldAggCalls = rel.getAggCallList(); + + int oldInputOutputFieldCount = rel.getGroupSet().cardinality(); + int newInputOutputFieldCount = newGroupSet.cardinality(); + + int i = -1; + for (AggregateCall oldAggCall : oldAggCalls) { + ++i; + List oldAggArgs = oldAggCall.getArgList(); + + List aggArgs = Lists.newArrayList(); + + // Adjust the aggregator argument positions. + // Note aggregator does not change input ordering, so the input + // output position mapping can be used to derive the new positions + // for the argument. + for (int oldPos : oldAggArgs) { + aggArgs.add(combinedMap.get(oldPos)); + } + final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg + : combinedMap.get(oldAggCall.filterArg); + + newAggCalls.add( + oldAggCall.adaptTo(newProject, aggArgs, filterArg, + oldGroupKeyCount, newGroupKeyCount)); + + // The old to new output position mapping will be the same as that + // of newProject, plus any aggregates that the oldAgg produces. + combinedMap.put( + oldInputOutputFieldCount + i, + newInputOutputFieldCount + i); + } + + relBuilder.push( + new HiveAggregate(rel.getCluster(), rel.getTraitSet(), newProject, false, newGroupSet, null, newAggCalls) ); + + if (!omittedConstants.isEmpty()) { + final List postProjects = new ArrayList<>(relBuilder.fields()); + for (Map.Entry entry + : omittedConstants.descendingMap().entrySet()) { + postProjects.add(entry.getKey() + frame.corVarOutputPos.size(), + entry.getValue()); + } + relBuilder.project(postProjects); + } + + // Aggregate does not change input ordering so corVars will be + // located at the same position as the input newProject. + return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos); + } + } + + public Frame decorrelateRel(HiveProject rel) throws SemanticException{ + { + // + // Rewrite logic: + // + // 1. Pass along any correlated variables coming from the input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final List oldProjects = rel.getProjects(); + final List relOutput = rel.getRowType().getFieldList(); + + // LogicalProject projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + // If this LogicalProject has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the LogicalJoin created by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // LogicalProject projects the original expressions + final Map mapOldToNewOutputPos = Maps.newHashMap(); + int newPos; + for (newPos = 0; newPos < oldProjects.size(); newPos++) { + projects.add( + newPos, + Pair.of( + decorrelateExpr(oldProjects.get(newPos)), + relOutput.get(newPos).getName())); + mapOldToNewOutputPos.put(newPos, newPos); + } + + + // Project any correlated variables the input wants to pass along. + // There could be situation e.g. multiple correlated variables refering to + // same outer variable, in which case Project will be created with multiple + // fields with same name. Hive doesn't allow HiveProject with multiple fields + // having same name. So to avoid that we keep a set of all fieldnames and + // on encountering an existing one a new field/column name is generated + final Set corrFieldName = Sets.newHashSet(); + int pos = 0; + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + for (Map.Entry entry : frame.corVarOutputPos.entrySet()) { + final RelDataTypeField field = frame.r.getRowType().getFieldList().get(entry.getValue()); + RexNode projectChild = (RexNode) new RexInputRef(entry.getValue(), field.getType()); + String fieldName = field.getName(); + if(corrFieldName.contains(fieldName)) + { + fieldName = SemanticAnalyzer.getColumnInternalName(pos++); + } + + projects.add(Pair.of(projectChild ,fieldName)); + corrFieldName.add(fieldName); + mapCorVarToOutputPos.put(entry.getKey(), newPos); + newPos++; + } + + RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects)); + + return register(rel, newProject, mapOldToNewOutputPos, + mapCorVarToOutputPos); + } + } + /** + * Rewrite LogicalProject. + * + * @param rel the project rel to rewrite + */ + public Frame decorrelateRel(LogicalProject rel) { + // + // Rewrite logic: + // + // 1. Pass along any correlated variables coming from the input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final List oldProjects = rel.getProjects(); + final List relOutput = rel.getRowType().getFieldList(); + + // LogicalProject projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + // If this LogicalProject has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the LogicalJoin created by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // LogicalProject projects the original expressions + final Map mapOldToNewOutputPos = Maps.newHashMap(); + int newPos; + for (newPos = 0; newPos < oldProjects.size(); newPos++) { + projects.add( + newPos, + Pair.of( + decorrelateExpr(oldProjects.get(newPos)), + relOutput.get(newPos).getName())); + mapOldToNewOutputPos.put(newPos, newPos); + } + + // Project any correlated variables the input wants to pass along. + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + for (Map.Entry entry : frame.corVarOutputPos.entrySet()) { + projects.add( + RexInputRef.of2(entry.getValue(), + frame.r.getRowType().getFieldList())); + mapCorVarToOutputPos.put(entry.getKey(), newPos); + newPos++; + } + + RelNode newProject = + RelOptUtil.createProject(frame.r, projects, false); + + return register(rel, newProject, mapOldToNewOutputPos, + mapCorVarToOutputPos); + } + + /** + * Create RelNode tree that produces a list of correlated variables. + * + * @param correlations correlated variables to generate + * @param valueGenFieldOffset offset in the output that generated columns + * will start + * @param mapCorVarToOutputPos output positions for the correlated variables + * generated + * @return RelNode the root of the resultant RelNode tree + */ + private RelNode createValueGenerator( + Iterable correlations, + int valueGenFieldOffset, + SortedMap mapCorVarToOutputPos) { + final Map> mapNewInputToOutputPos = + new HashMap<>(); + + final Map mapNewInputToNewOffset = new HashMap<>(); + + // Input provides the definition of a correlated variable. + // Add to map all the referenced positions (relative to each input rel). + for (Correlation corVar : correlations) { + final int oldCorVarOffset = corVar.field; + + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final Frame frame = map.get(oldInput); + assert frame != null; + final RelNode newInput = frame.r; + + final List newLocalOutputPosList; + if (!mapNewInputToOutputPos.containsKey(newInput)) { + newLocalOutputPosList = Lists.newArrayList(); + } else { + newLocalOutputPosList = + mapNewInputToOutputPos.get(newInput); + } + + final int newCorVarOffset = frame.oldToNewOutputPos.get(oldCorVarOffset); + + // Add all unique positions referenced. + if (!newLocalOutputPosList.contains(newCorVarOffset)) { + newLocalOutputPosList.add(newCorVarOffset); + } + mapNewInputToOutputPos.put(newInput, newLocalOutputPosList); + } + + int offset = 0; + + // Project only the correlated fields out of each inputRel + // and join the projectRel together. + // To make sure the plan does not change in terms of join order, + // join these rels based on their occurrence in cor var list which + // is sorted. + final Set joinedInputRelSet = Sets.newHashSet(); + + RelNode r = null; + for (Correlation corVar : correlations) { + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final RelNode newInput = map.get(oldInput).r; + assert newInput != null; + + if (!joinedInputRelSet.contains(newInput)) { + RelNode project = + RelOptUtil.createProject( + newInput, + mapNewInputToOutputPos.get(newInput)); + RelNode distinct = RelOptUtil.createDistinctRel(project); + RelOptCluster cluster = distinct.getCluster(); + + joinedInputRelSet.add(newInput); + mapNewInputToNewOffset.put(newInput, offset); + offset += distinct.getRowType().getFieldCount(); + + if (r == null) { + r = distinct; + } else { + r = + LogicalJoin.create(r, distinct, + cluster.getRexBuilder().makeLiteral(true), + ImmutableSet.of(), JoinRelType.INNER); + } + } + } + + // Translate the positions of correlated variables to be relative to + // the join output, leaving room for valueGenFieldOffset because + // valueGenerators are joined with the original left input of the rel + // referencing correlated variables. + for (Correlation corVar : correlations) { + // The first input of a Correlator is always the rel defining + // the correlated variables. + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final Frame frame = map.get(oldInput); + final RelNode newInput = frame.r; + assert newInput != null; + + final List newLocalOutputPosList = + mapNewInputToOutputPos.get(newInput); + + final int newLocalOutputPos = frame.oldToNewOutputPos.get(corVar.field); + + // newOutputPos is the index of the cor var in the referenced + // position list plus the offset of referenced position list of + // each newInput. + final int newOutputPos = + newLocalOutputPosList.indexOf(newLocalOutputPos) + + mapNewInputToNewOffset.get(newInput) + + valueGenFieldOffset; + + if (mapCorVarToOutputPos.containsKey(corVar)) { + assert mapCorVarToOutputPos.get(corVar) == newOutputPos; + } + mapCorVarToOutputPos.put(corVar, newOutputPos); + } + + return r; + } + + private RelNode getCorRel(Correlation corVar) { + final RelNode r = cm.mapCorVarToCorRel.get(corVar.corr); + return r.getInput(0); + } + + private void decorrelateInputWithValueGenerator(RelNode rel) { + // currently only handles one input input + assert rel.getInputs().size() == 1; + RelNode oldInput = rel.getInput(0); + final Frame frame = map.get(oldInput); + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(frame.corVarOutputPos); + + final Collection corVarList = cm.mapRefRelToCorVar.get(rel); + + int leftInputOutputCount = frame.r.getRowType().getFieldCount(); + + // can directly add positions into mapCorVarToOutputPos since join + // does not change the output ordering from the inputs. + RelNode valueGen = + createValueGenerator( + corVarList, + leftInputOutputCount, + mapCorVarToOutputPos); + + RelNode join = + LogicalJoin.create(frame.r, valueGen, rexBuilder.makeLiteral(true), + ImmutableSet.of(), JoinRelType.INNER); + + // LogicalJoin or LogicalFilter does not change the old input ordering. All + // input fields from newLeftInput(i.e. the original input to the old + // LogicalFilter) are in the output and in the same position. + register(oldInput, join, frame.oldToNewOutputPos, mapCorVarToOutputPos); + } + + public Frame decorrelateRel(HiveFilter rel) throws SemanticException { + { + // + // Rewrite logic: + // + // 1. If a LogicalFilter references a correlated field in its filter + // condition, rewrite the LogicalFilter to be + // LogicalFilter + // LogicalJoin(cross product) + // OriginalFilterInput + // ValueGenerator(produces distinct sets of correlated variables) + // and rewrite the correlated fieldAccess in the filter condition to + // reference the LogicalJoin output. + // + // 2. If LogicalFilter does not reference correlated variables, simply + // rewrite the filter condition using new input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + // If this LogicalFilter has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the newly created LogicalJoin by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + RelNode newFilter = new HiveFilter(rel.getCluster(), rel.getTraitSet(), frame.r, + decorrelateExpr(rel.getCondition())); + + // Filter does not change the input ordering. + // Filter rel does not permute the input. + // All corvars produced by filter will have the same output positions in the + // input rel. + return register(rel, newFilter, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + } + + /** + * Rewrite LogicalFilter. + * + * @param rel the filter rel to rewrite + */ + public Frame decorrelateRel(LogicalFilter rel) { + // + // Rewrite logic: + // + // 1. If a LogicalFilter references a correlated field in its filter + // condition, rewrite the LogicalFilter to be + // LogicalFilter + // LogicalJoin(cross product) + // OriginalFilterInput + // ValueGenerator(produces distinct sets of correlated variables) + // and rewrite the correlated fieldAccess in the filter condition to + // reference the LogicalJoin output. + // + // 2. If LogicalFilter does not reference correlated variables, simply + // rewrite the filter condition using new input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + // If this LogicalFilter has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the newly created LogicalJoin by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + RelNode newFilter = + RelOptUtil.createFilter( + frame.r, + decorrelateExpr(rel.getCondition())); + + // Filter does not change the input ordering. + // Filter rel does not permute the input. + // All corvars produced by filter will have the same output positions in the + // input rel. + return register(rel, newFilter, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + + /** + * Rewrite Correlator into a left outer join. + * + * @param rel Correlator + */ + public Frame decorrelateRel(LogicalCorrelate rel) { + // + // Rewrite logic: + // + // The original left input will be joined with the new right input that + // has generated correlated variables propagated up. For any generated + // cor vars that are not used in the join key, pass them along to be + // joined later with the CorrelatorRels that produce them. + // + + // the right input to Correlator should produce correlated variables + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + if (rightFrame.corVarOutputPos.isEmpty()) { + return null; + } + + assert rel.getRequiredColumns().cardinality() + <= rightFrame.corVarOutputPos.keySet().size(); + + // Change correlator rel into a join. + // Join all the correlated variables produced by this correlator rel + // with the values generated and propagated from the right input + final SortedMap corVarOutputPos = + new TreeMap<>(rightFrame.corVarOutputPos); + final List conditions = new ArrayList<>(); + final List newLeftOutput = + leftFrame.r.getRowType().getFieldList(); + int newLeftFieldCount = newLeftOutput.size(); + + final List newRightOutput = + rightFrame.r.getRowType().getFieldList(); + + for (Map.Entry rightOutputPos + : Lists.newArrayList(corVarOutputPos.entrySet())) { + final Correlation corVar = rightOutputPos.getKey(); + if (!corVar.corr.equals(rel.getCorrelationId())) { + continue; + } + final int newLeftPos = leftFrame.oldToNewOutputPos.get(corVar.field); + final int newRightPos = rightOutputPos.getValue(); + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + // remove this cor var from output position mapping + corVarOutputPos.remove(corVar); + } + + // Update the output position for the cor vars: only pass on the cor + // vars that are not used in the join key. + for (Correlation corVar : corVarOutputPos.keySet()) { + int newPos = corVarOutputPos.get(corVar) + newLeftFieldCount; + corVarOutputPos.put(corVar, newPos); + } + + // then add any cor var from the left input. Do not need to change + // output positions. + corVarOutputPos.putAll(leftFrame.corVarOutputPos); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + final Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put( + i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final RexNode condition = + RexUtil.composeConjunction(rexBuilder, conditions, false); + RelNode newJoin = + LogicalJoin.create(leftFrame.r, rightFrame.r, condition, + ImmutableSet.of(), rel.getJoinType().toJoinType()); + + return register(rel, newJoin, mapOldToNewOutputPos, corVarOutputPos); + } + + public Frame decorrelateRel(HiveJoin rel) throws SemanticException{ + // + // Rewrite logic: + // + // 1. rewrite join condition. + // 2. map output positions and produce cor vars if any. + // + + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType() ); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put(i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(leftFrame.corVarOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (Map.Entry entry + : rightFrame.corVarOutputPos.entrySet()) { + mapCorVarToOutputPos.put(entry.getKey(), + entry.getValue() + newLeftFieldCount); + } + return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos); + } + /** + * Rewrite LogicalJoin. + * + * @param rel LogicalJoin + */ + public Frame decorrelateRel(LogicalJoin rel) { + // + // Rewrite logic: + // + // 1. rewrite join condition. + // 2. map output positions and produce cor vars if any. + // + + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + final RelNode newJoin = + LogicalJoin.create(leftFrame.r, rightFrame.r, + decorrelateExpr(rel.getCondition()), + ImmutableSet.of(), rel.getJoinType()); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put(i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(leftFrame.corVarOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (Map.Entry entry + : rightFrame.corVarOutputPos.entrySet()) { + mapCorVarToOutputPos.put(entry.getKey(), + entry.getValue() + newLeftFieldCount); + } + return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos); + } + + private RexInputRef getNewForOldInputRef(RexInputRef oldInputRef) { + assert currentRel != null; + + int oldOrdinal = oldInputRef.getIndex(); + int newOrdinal = 0; + + // determine which input rel oldOrdinal references, and adjust + // oldOrdinal to be relative to that input rel + RelNode oldInput = null; + + for (RelNode oldInput0 : currentRel.getInputs()) { + RelDataType oldInputType = oldInput0.getRowType(); + int n = oldInputType.getFieldCount(); + if (oldOrdinal < n) { + oldInput = oldInput0; + break; + } + RelNode newInput = map.get(oldInput0).r; + newOrdinal += newInput.getRowType().getFieldCount(); + oldOrdinal -= n; + } + + assert oldInput != null; + + final Frame frame = map.get(oldInput); + assert frame != null; + + // now oldOrdinal is relative to oldInput + int oldLocalOrdinal = oldOrdinal; + + // figure out the newLocalOrdinal, relative to the newInput. + int newLocalOrdinal = oldLocalOrdinal; + + if (!frame.oldToNewOutputPos.isEmpty()) { + newLocalOrdinal = frame.oldToNewOutputPos.get(oldLocalOrdinal); + } + + newOrdinal += newLocalOrdinal; + + return new RexInputRef(newOrdinal, + frame.r.getRowType().getFieldList().get(newLocalOrdinal).getType()); + } + + /** + * Pulls project above the join from its RHS input. Enforces nullability + * for join output. + * + * @param join Join + * @param project Original project as the right-hand input of the join + * @param nullIndicatorPos Position of null indicator + * @return the subtree with the new LogicalProject at the root + */ + private RelNode projectJoinOutputWithNullability( + LogicalJoin join, + LogicalProject project, + int nullIndicatorPos) { + final RelDataTypeFactory typeFactory = join.getCluster().getTypeFactory(); + final RelNode left = join.getLeft(); + final JoinRelType joinType = join.getJoinType(); + + RexInputRef nullIndicator = + new RexInputRef( + nullIndicatorPos, + typeFactory.createTypeWithNullability( + join.getRowType().getFieldList().get(nullIndicatorPos) + .getType(), + true)); + + // now create the new project + List> newProjExprs = Lists.newArrayList(); + + // project everything from the LHS and then those from the original + // projRel + List leftInputFields = + left.getRowType().getFieldList(); + + for (int i = 0; i < leftInputFields.size(); i++) { + newProjExprs.add(RexInputRef.of2(i, leftInputFields)); + } + + // Marked where the projected expr is coming from so that the types will + // become nullable for the original projections which are now coming out + // of the nullable side of the OJ. + boolean projectPulledAboveLeftCorrelator = + joinType.generatesNullsOnRight(); + + for (Pair pair : project.getNamedProjects()) { + RexNode newProjExpr = + removeCorrelationExpr( + pair.left, + projectPulledAboveLeftCorrelator, + nullIndicator); + + newProjExprs.add(Pair.of(newProjExpr, pair.right)); + } + + return RelOptUtil.createProject(join, newProjExprs, false); + } + + /** + * Pulls a {@link Project} above a {@link Correlate} from its RHS input. + * Enforces nullability for join output. + * + * @param correlate Correlate + * @param project the original project as the RHS input of the join + * @param isCount Positions which are calls to the COUNT + * aggregation function + * @return the subtree with the new LogicalProject at the root + */ + private RelNode aggregateCorrelatorOutput( + Correlate correlate, + LogicalProject project, + Set isCount) { + final RelNode left = correlate.getLeft(); + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + + // now create the new project + final List> newProjects = Lists.newArrayList(); + + // Project everything from the LHS and then those from the original + // project + final List leftInputFields = + left.getRowType().getFieldList(); + + for (int i = 0; i < leftInputFields.size(); i++) { + newProjects.add(RexInputRef.of2(i, leftInputFields)); + } + + // Marked where the projected expr is coming from so that the types will + // become nullable for the original projections which are now coming out + // of the nullable side of the OJ. + boolean projectPulledAboveLeftCorrelator = + joinType.generatesNullsOnRight(); + + for (Pair pair : project.getNamedProjects()) { + RexNode newProjExpr = + removeCorrelationExpr( + pair.left, + projectPulledAboveLeftCorrelator, + isCount); + newProjects.add(Pair.of(newProjExpr, pair.right)); + } + + return RelOptUtil.createProject(correlate, newProjects, false); + } + + /** + * Checks whether the correlations in projRel and filter are related to + * the correlated variables provided by corRel. + * + * @param correlate Correlate + * @param project The original Project as the RHS input of the join + * @param filter Filter + * @param correlatedJoinKeys Correlated join keys + * @return true if filter and proj only references corVar provided by corRel + */ + private boolean checkCorVars( + LogicalCorrelate correlate, + LogicalProject project, + LogicalFilter filter, + List correlatedJoinKeys) { + if (filter != null) { + assert correlatedJoinKeys != null; + + // check that all correlated refs in the filter condition are + // used in the join(as field access). + Set corVarInFilter = + Sets.newHashSet(cm.mapRefRelToCorVar.get(filter)); + + for (RexFieldAccess correlatedJoinKey : correlatedJoinKeys) { + corVarInFilter.remove(cm.mapFieldAccessToCorVar.get(correlatedJoinKey)); + } + + if (!corVarInFilter.isEmpty()) { + return false; + } + + // Check that the correlated variables referenced in these + // comparisons do come from the correlatorRel. + corVarInFilter.addAll(cm.mapRefRelToCorVar.get(filter)); + + for (Correlation corVar : corVarInFilter) { + if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) { + return false; + } + } + } + + // if project has any correlated reference, make sure they are also + // provided by the current correlate. They will be projected out of the LHS + // of the correlate. + if ((project != null) && cm.mapRefRelToCorVar.containsKey(project)) { + for (Correlation corVar : cm.mapRefRelToCorVar.get(project)) { + if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) { + return false; + } + } + } + + return true; + } + + /** + * Remove correlated variables from the tree at root corRel + * + * @param correlate Correlator + */ + private void removeCorVarFromTree(LogicalCorrelate correlate) { + if (cm.mapCorVarToCorRel.get(correlate.getCorrelationId()) == correlate) { + cm.mapCorVarToCorRel.remove(correlate.getCorrelationId()); + } + } + + /** + * Projects all {@code input} output fields plus the additional expressions. + * + * @param input Input relational expression + * @param additionalExprs Additional expressions and names + * @return the new LogicalProject + */ + private RelNode createProjectWithAdditionalExprs( + RelNode input, + List> additionalExprs) { + final List fieldList = + input.getRowType().getFieldList(); + List> projects = Lists.newArrayList(); + for (Ord field : Ord.zip(fieldList)) { + projects.add( + Pair.of( + (RexNode) rexBuilder.makeInputRef( + field.e.getType(), field.i), + field.e.getName())); + } + projects.addAll(additionalExprs); + return RelOptUtil.createProject(input, projects, false); + } + + /* Returns an immutable map with the identity [0: 0, .., count-1: count-1]. */ + static Map identityMap(int count) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i = 0; i < count; i++) { + builder.put(i, i); + } + return builder.build(); + } + + /** Registers a relational expression and the relational expression it became + * after decorrelation. */ + Frame register(RelNode rel, RelNode newRel, + Map oldToNewOutputPos, + SortedMap corVarToOutputPos) { + assert allLessThan(oldToNewOutputPos.keySet(), + newRel.getRowType().getFieldCount(), Litmus.THROW); + final Frame frame = new Frame(newRel, corVarToOutputPos, oldToNewOutputPos); + map.put(rel, frame); + return frame; + } + + static boolean allLessThan(Collection integers, int limit, + Litmus ret) { + for (int value : integers) { + if (value >= limit) { + return ret.fail("out of range; value: " + value + ", limit: " + limit); + } + } + return ret.succeed(); + } + + private static RelNode stripHep(RelNode rel) { + if (rel instanceof HepRelVertex) { + HepRelVertex hepRelVertex = (HepRelVertex) rel; + rel = hepRelVertex.getCurrentRel(); + } + return rel; + } + + //~ Inner Classes ---------------------------------------------------------- + + /** Shuttle that decorrelates. */ + private class DecorrelateRexShuttle extends RexShuttle { + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + int newInputOutputOffset = 0; + for (RelNode input : currentRel.getInputs()) { + final Frame frame = map.get(input); + + if (frame != null) { + // try to find in this input rel the position of cor var + final Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess); + + if (corVar != null) { + Integer newInputPos = frame.corVarOutputPos.get(corVar); + if (newInputPos != null) { + // This input rel does produce the cor var referenced. + // Assume fieldAccess has the correct type info. + return new RexInputRef(newInputPos + newInputOutputOffset, + fieldAccess.getType()); + } + } + + // this input rel does not produce the cor var needed + newInputOutputOffset += frame.r.getRowType().getFieldCount(); + } else { + // this input rel is not rewritten + newInputOutputOffset += input.getRowType().getFieldCount(); + } + } + return fieldAccess; + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + return getNewForOldInputRef(inputRef); + } + } + + /** Shuttle that removes correlations. */ + private class RemoveCorrelationRexShuttle extends RexShuttle { + final RexBuilder rexBuilder; + final RelDataTypeFactory typeFactory; + final boolean projectPulledAboveLeftCorrelator; + final RexInputRef nullIndicator; + final ImmutableSet isCount; + + public RemoveCorrelationRexShuttle( + RexBuilder rexBuilder, + boolean projectPulledAboveLeftCorrelator, + RexInputRef nullIndicator, + Set isCount) { + this.projectPulledAboveLeftCorrelator = + projectPulledAboveLeftCorrelator; + this.nullIndicator = nullIndicator; // may be null + this.isCount = ImmutableSet.copyOf(isCount); + this.rexBuilder = rexBuilder; + this.typeFactory = rexBuilder.getTypeFactory(); + } + + private RexNode createCaseExpression( + RexInputRef nullInputRef, + RexLiteral lit, + RexNode rexNode) { + RexNode[] caseOperands = new RexNode[3]; + + // Construct a CASE expression to handle the null indicator. + // + // This also covers the case where a left correlated subquery + // projects fields from outer relation. Since LOJ cannot produce + // nulls on the LHS, the projection now need to make a nullable LHS + // reference using a nullability indicator. If this this indicator + // is null, it means the subquery does not produce any value. As a + // result, any RHS ref by this usbquery needs to produce null value. + + // WHEN indicator IS NULL + caseOperands[0] = + rexBuilder.makeCall( + SqlStdOperatorTable.IS_NULL, + new RexInputRef( + nullInputRef.getIndex(), + typeFactory.createTypeWithNullability( + nullInputRef.getType(), + true))); + + // THEN CAST(NULL AS newInputTypeNullable) + caseOperands[1] = + rexBuilder.makeCast( + typeFactory.createTypeWithNullability( + rexNode.getType(), + true), + lit); + + // ELSE cast (newInput AS newInputTypeNullable) END + caseOperands[2] = + rexBuilder.makeCast( + typeFactory.createTypeWithNullability( + rexNode.getType(), + true), + rexNode); + + return rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + caseOperands); + } + + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + if (cm.mapFieldAccessToCorVar.containsKey(fieldAccess)) { + // if it is a corVar, change it to be input ref. + Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess); + + // corVar offset should point to the leftInput of currentRel, + // which is the Correlator. + RexNode newRexNode = + new RexInputRef(corVar.field, fieldAccess.getType()); + + if (projectPulledAboveLeftCorrelator + && (nullIndicator != null)) { + // need to enforce nullability by applying an additional + // cast operator over the transformed expression. + newRexNode = + createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + newRexNode); + } + return newRexNode; + } + return fieldAccess; + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + if (currentRel instanceof LogicalCorrelate) { + // if this rel references corVar + // and now it needs to be rewritten + // it must have been pulled above the Correlator + // replace the input ref to account for the LHS of the + // Correlator + final int leftInputFieldCount = + ((LogicalCorrelate) currentRel).getLeft().getRowType() + .getFieldCount(); + RelDataType newType = inputRef.getType(); + + if (projectPulledAboveLeftCorrelator) { + newType = + typeFactory.createTypeWithNullability(newType, true); + } + + int pos = inputRef.getIndex(); + RexInputRef newInputRef = + new RexInputRef(leftInputFieldCount + pos, newType); + + if ((isCount != null) && isCount.contains(pos)) { + return createCaseExpression( + newInputRef, + rexBuilder.makeExactLiteral(BigDecimal.ZERO), + newInputRef); + } else { + return newInputRef; + } + } + return inputRef; + } + + @Override public RexNode visitLiteral(RexLiteral literal) { + // Use nullIndicator to decide whether to project null. + // Do nothing if the literal is null. + if (!RexUtil.isNull(literal) + && projectPulledAboveLeftCorrelator + && (nullIndicator != null)) { + return createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + literal); + } + return literal; + } + + @Override public RexNode visitCall(final RexCall call) { + RexNode newCall; + + boolean[] update = {false}; + List clonedOperands = visitList(call.operands, update); + if (update[0]) { + SqlOperator operator = call.getOperator(); + + boolean isSpecialCast = false; + if (operator instanceof SqlFunction) { + SqlFunction function = (SqlFunction) operator; + if (function.getKind() == SqlKind.CAST) { + if (call.operands.size() < 2) { + isSpecialCast = true; + } + } + } + + final RelDataType newType; + if (!isSpecialCast) { + // TODO: ideally this only needs to be called if the result + // type will also change. However, since that requires + // support from type inference rules to tell whether a rule + // decides return type based on input types, for now all + // operators will be recreated with new type if any operand + // changed, unless the operator has "built-in" type. + newType = rexBuilder.deriveReturnType(operator, clonedOperands); + } else { + // Use the current return type when creating a new call, for + // operators with return type built into the operator + // definition, and with no type inference rules, such as + // cast function with less than 2 operands. + + // TODO: Comments in RexShuttle.visitCall() mention other + // types in this category. Need to resolve those together + // and preferably in the base class RexShuttle. + newType = call.getType(); + } + newCall = + rexBuilder.makeCall( + newType, + operator, + clonedOperands); + } else { + newCall = call; + } + + if (projectPulledAboveLeftCorrelator && (nullIndicator != null)) { + return createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + newCall); + } + return newCall; + } + } + + /** + * Rule to remove single_value rel. For cases like + * + *

AggRel single_value proj/filter/agg/ join on unique LHS key + * AggRel single group
+ */ + private final class RemoveSingleAggregateRule extends RelOptRule { + public RemoveSingleAggregateRule() { + super( + operand( + LogicalAggregate.class, + operand( + LogicalProject.class, + operand(LogicalAggregate.class, any())))); + } + + public void onMatch(RelOptRuleCall call) { + LogicalAggregate singleAggregate = call.rel(0); + LogicalProject project = call.rel(1); + LogicalAggregate aggregate = call.rel(2); + + // check singleAggRel is single_value agg + if ((!singleAggregate.getGroupSet().isEmpty()) + || (singleAggregate.getAggCallList().size() != 1) + || !(singleAggregate.getAggCallList().get(0).getAggregation() + instanceof SqlSingleValueAggFunction)) { + return; + } + + // check projRel only projects one expression + // check this project only projects one expression, i.e. scalar + // subqueries. + List projExprs = project.getProjects(); + if (projExprs.size() != 1) { + return; + } + + // check the input to projRel is an aggregate on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + // singleAggRel produces a nullable type, so create the new + // projection that casts proj expr to a nullable type. + final RelOptCluster cluster = project.getCluster(); + RelNode newProject = + RelOptUtil.createProject(aggregate, + ImmutableList.of( + rexBuilder.makeCast( + cluster.getTypeFactory().createTypeWithNullability( + projExprs.get(0).getType(), + true), + projExprs.get(0))), + null); + call.transformTo(newProject); + } + } + + /** Planner rule that removes correlations for scalar projects. */ + private final class RemoveCorrelationForScalarProjectRule extends RelOptRule { + public RemoveCorrelationForScalarProjectRule() { + super( + operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalAggregate.class, + operand(LogicalProject.class, + operand(RelNode.class, any()))))); + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalAggregate aggregate = call.rel(2); + final LogicalProject project = call.rel(3); + RelNode right = call.rel(4); + final RelOptCluster cluster = correlate.getCluster(); + + setCurrent(call.getPlanner().getRoot(), correlate); + + // Check for this pattern. + // The pattern matching could be simplified if rules can be applied + // during decorrelation. + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalAggregate (groupby (0) single_value()) + // LogicalProject-A (may reference coVar) + // RightInputRel + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is of the following type: + // doing a single_value() on the entire input + if ((!aggregate.getGroupSet().isEmpty()) + || (aggregate.getAggCallList().size() != 1) + || !(aggregate.getAggCallList().get(0).getAggregation() + instanceof SqlSingleValueAggFunction)) { + return; + } + + // check this project only projects one expression, i.e. scalar + // subqueries. + if (project.getProjects().size() != 1) { + return; + } + + int nullIndicatorPos; + + if ((right instanceof LogicalFilter) + && cm.mapRefRelToCorVar.containsKey(right)) { + // rightInputRel has this shape: + // + // LogicalFilter (references corvar) + // FilterInputRel + + // If rightInputRel is a filter and contains correlated + // reference, make sure the correlated keys in the filter + // condition forms a unique key of the RHS. + + LogicalFilter filter = (LogicalFilter) right; + right = filter.getInput(); + + assert right instanceof HepRelVertex; + right = ((HepRelVertex) right).getCurrentRel(); + + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // extract the correlation out of the filter + + // First breaking up the filter conditions into equality + // comparisons between rightJoinKeys(from the original + // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys + // can be expressions, while rightJoinKeys need to be input + // refs. These comparisons are AND'ed together. + List tmpRightJoinKeys = Lists.newArrayList(); + List correlatedJoinKeys = Lists.newArrayList(); + RelOptUtil.splitCorrelatedFilterCondition( + filter, + tmpRightJoinKeys, + correlatedJoinKeys, + false); + + // check that the columns referenced in these comparisons form + // an unique key of the filterInputRel + final List rightJoinKeys = new ArrayList<>(); + for (RexNode key : tmpRightJoinKeys) { + assert key instanceof RexInputRef; + rightJoinKeys.add((RexInputRef) key); + } + + // check that the columns referenced in rightJoinKeys form an + // unique key of the filterInputRel + if (rightJoinKeys.isEmpty()) { + return; + } + + // The join filters out the nulls. So, it's ok if there are + // nulls in the join keys. + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, right, + rightJoinKeys)) { + //SQL2REL_LOGGER.fine(rightJoinKeys.toString() + // + "are not unique keys for " + // + right.toString()); + return; + } + + RexUtil.FieldAccessFinder visitor = + new RexUtil.FieldAccessFinder(); + RexUtil.apply(visitor, correlatedJoinKeys, null); + List correlatedKeyList = + visitor.getFieldAccessList(); + + if (!checkCorVars(correlate, project, filter, correlatedKeyList)) { + return; + } + + // Change the plan to this structure. + // Note that the aggregateRel is removed. + // + // LogicalProject-A' (replace corvar to input ref from the LogicalJoin) + // LogicalJoin (replace corvar to input ref from LeftInputRel) + // LeftInputRel + // RightInputRel(oreviously FilterInputRel) + + // Change the filter condition into a join condition + joinCond = + removeCorrelationExpr(filter.getCondition(), false); + + nullIndicatorPos = + left.getRowType().getFieldCount() + + rightJoinKeys.get(0).getIndex(); + } else if (cm.mapRefRelToCorVar.containsKey(project)) { + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + if (!checkCorVars(correlate, project, null, null)) { + return; + } + + // Change the plan to this structure. + // + // LogicalProject-A' (replace corvar to input ref from LogicalJoin) + // LogicalJoin (left, condition = true) + // LeftInputRel + // LogicalAggregate(groupby(0), single_value(0), s_v(1)....) + // LogicalProject-B (everything from input plus literal true) + // ProjInputRel + + // make the new projRel to provide a null indicator + right = + createProjectWithAdditionalExprs(right, + ImmutableList.of( + Pair.of( + rexBuilder.makeLiteral(true), "nullIndicator"))); + + // make the new aggRel + right = + RelOptUtil.createSingleValueAggRel(cluster, right); + + // The last field: + // single_value(true) + // is the nullIndicator + nullIndicatorPos = + left.getRowType().getFieldCount() + + right.getRowType().getFieldCount() - 1; + } else { + return; + } + + // make the new join rel + LogicalJoin join = + LogicalJoin.create(left, right, joinCond, + ImmutableSet.of(), joinType); + + RelNode newProject = + projectJoinOutputWithNullability(join, project, nullIndicatorPos); + + call.transformTo(newProject); + + removeCorVarFromTree(correlate); + } + } + + /** Planner rule that removes correlations for scalar aggregates. */ + private final class RemoveCorrelationForScalarAggregateRule + extends RelOptRule { + public RemoveCorrelationForScalarAggregateRule() { + super( + operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalProject.class, + operand(LogicalAggregate.class, null, Aggregate.IS_SIMPLE, + operand(LogicalProject.class, + operand(RelNode.class, any())))))); + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalProject aggOutputProject = call.rel(2); + final LogicalAggregate aggregate = call.rel(3); + final LogicalProject aggInputProject = call.rel(4); + RelNode right = call.rel(5); + final RelOptCluster cluster = correlate.getCluster(); + + setCurrent(call.getPlanner().getRoot(), correlate); + + // check for this pattern + // The pattern matching could be simplified if rules can be applied + // during decorrelation, + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby (0), agg0(), agg1()...) + // LogicalProject-B (references coVar) + // rightInputRel + + // check aggOutputProject projects only one expression + final List aggOutputProjects = aggOutputProject.getProjects(); + if (aggOutputProjects.size() != 1) { + return; + } + + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + final List aggInputProjects = aggInputProject.getProjects(); + + final List aggCalls = aggregate.getAggCallList(); + final Set isCountStar = Sets.newHashSet(); + + // mark if agg produces count(*) which needs to reference the + // nullIndicator after the transformation. + int k = -1; + for (AggregateCall aggCall : aggCalls) { + ++k; + if ((aggCall.getAggregation() instanceof SqlCountAggFunction) + && (aggCall.getArgList().size() == 0)) { + isCountStar.add(k); + } + } + + if ((right instanceof LogicalFilter) + && cm.mapRefRelToCorVar.containsKey(right)) { + // rightInputRel has this shape: + // + // LogicalFilter (references corvar) + // FilterInputRel + LogicalFilter filter = (LogicalFilter) right; + right = filter.getInput(); + + assert right instanceof HepRelVertex; + right = ((HepRelVertex) right).getCurrentRel(); + + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // check filter condition type First extract the correlation out + // of the filter + + // First breaking up the filter conditions into equality + // comparisons between rightJoinKeys(from the original + // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys + // can only be RexFieldAccess, while rightJoinKeys can be + // expressions. These comparisons are AND'ed together. + List rightJoinKeys = Lists.newArrayList(); + List tmpCorrelatedJoinKeys = Lists.newArrayList(); + RelOptUtil.splitCorrelatedFilterCondition( + filter, + rightJoinKeys, + tmpCorrelatedJoinKeys, + true); + + // make sure the correlated reference forms a unique key check + // that the columns referenced in these comparisons form an + // unique key of the leftInputRel + List correlatedJoinKeys = Lists.newArrayList(); + List correlatedInputRefJoinKeys = Lists.newArrayList(); + for (RexNode joinKey : tmpCorrelatedJoinKeys) { + assert joinKey instanceof RexFieldAccess; + correlatedJoinKeys.add((RexFieldAccess) joinKey); + RexNode correlatedInputRef = + removeCorrelationExpr(joinKey, false); + assert correlatedInputRef instanceof RexInputRef; + correlatedInputRefJoinKeys.add( + (RexInputRef) correlatedInputRef); + } + + // check that the columns referenced in rightJoinKeys form an + // unique key of the filterInputRel + if (correlatedInputRefJoinKeys.isEmpty()) { + return; + } + + // The join filters out the nulls. So, it's ok if there are + // nulls in the join keys. + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, left, + correlatedInputRefJoinKeys)) { + //SQL2REL_LOGGER.fine(correlatedJoinKeys.toString() + // + "are not unique keys for " + // + left.toString()); + return; + } + + // check cor var references are valid + if (!checkCorVars(correlate, + aggInputProject, + filter, + correlatedJoinKeys)) { + return; + } + + // Rewrite the above plan: + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby(0), agg0(),agg1()...) + // LogicalProject-B (may reference coVar) + // LogicalFilter (references corVar) + // RightInputRel (no correlated reference) + // + + // to this plan: + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs) + // agg0(rewritten expression), + // agg1()...) + // LogicalProject-B' (rewriten original projected exprs) + // LogicalJoin(replace corvar w/ input ref from LeftInputRel) + // LeftInputRel + // RightInputRel + // + + // In the case where agg is count(*) or count($corVar), it is + // changed to count(nullIndicator). + // Note: any non-nullable field from the RHS can be used as + // the indicator however a "true" field is added to the + // projection list from the RHS for simplicity to avoid + // searching for non-null fields. + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs), + // count(nullIndicator), other aggs...) + // LogicalProject-B' (all left input refs plus + // the rewritten original projected exprs) + // LogicalJoin(replace corvar to input ref from LeftInputRel) + // LeftInputRel + // LogicalProject (everything from RightInputRel plus + // the nullIndicator "true") + // RightInputRel + // + + // first change the filter condition into a join condition + joinCond = + removeCorrelationExpr(filter.getCondition(), false); + } else if (cm.mapRefRelToCorVar.containsKey(aggInputProject)) { + // check rightInputRel contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // check cor var references are valid + if (!checkCorVars(correlate, aggInputProject, null, null)) { + return; + } + + int nFields = left.getRowType().getFieldCount(); + ImmutableBitSet allCols = ImmutableBitSet.range(nFields); + + // leftInputRel contains unique keys + // i.e. each row is distinct and can group by on all the left + // fields + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUnique(mq, left, allCols)) { + //SQL2REL_LOGGER.fine("There are no unique keys for " + left); + return; + } + // + // Rewrite the above plan: + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby(0), agg0(), agg1()...) + // LogicalProject-B (references coVar) + // RightInputRel (no correlated reference) + // + + // to this plan: + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs) + // agg0(rewritten expression), + // agg1()...) + // LogicalProject-B' (rewriten original projected exprs) + // LogicalJoin (LOJ cond = true) + // LeftInputRel + // RightInputRel + // + + // In the case where agg is count($corVar), it is changed to + // count(nullIndicator). + // Note: any non-nullable field from the RHS can be used as + // the indicator however a "true" field is added to the + // projection list from the RHS for simplicity to avoid + // searching for non-null fields. + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs), + // count(nullIndicator), other aggs...) + // LogicalProject-B' (all left input refs plus + // the rewritten original projected exprs) + // LogicalJoin(replace corvar to input ref from LeftInputRel) + // LeftInputRel + // LogicalProject (everything from RightInputRel plus + // the nullIndicator "true") + // RightInputRel + } else { + return; + } + + RelDataType leftInputFieldType = left.getRowType(); + int leftInputFieldCount = leftInputFieldType.getFieldCount(); + int joinOutputProjExprCount = + leftInputFieldCount + aggInputProjects.size() + 1; + + right = + createProjectWithAdditionalExprs(right, + ImmutableList.of( + Pair.of(rexBuilder.makeLiteral(true), + "nullIndicator"))); + + LogicalJoin join = + LogicalJoin.create(left, right, joinCond, + ImmutableSet.of(), joinType); + + // To the consumer of joinOutputProjRel, nullIndicator is located + // at the end + int nullIndicatorPos = join.getRowType().getFieldCount() - 1; + + RexInputRef nullIndicator = + new RexInputRef( + nullIndicatorPos, + cluster.getTypeFactory().createTypeWithNullability( + join.getRowType().getFieldList() + .get(nullIndicatorPos).getType(), + true)); + + // first project all group-by keys plus the transformed agg input + List joinOutputProjects = Lists.newArrayList(); + + // LOJ Join preserves LHS types + for (int i = 0; i < leftInputFieldCount; i++) { + joinOutputProjects.add( + rexBuilder.makeInputRef( + leftInputFieldType.getFieldList().get(i).getType(), i)); + } + + for (RexNode aggInputProjExpr : aggInputProjects) { + joinOutputProjects.add( + removeCorrelationExpr(aggInputProjExpr, + joinType.generatesNullsOnRight(), + nullIndicator)); + } + + joinOutputProjects.add( + rexBuilder.makeInputRef(join, nullIndicatorPos)); + + RelNode joinOutputProject = + RelOptUtil.createProject( + join, + joinOutputProjects, + null); + + // nullIndicator is now at a different location in the output of + // the join + nullIndicatorPos = joinOutputProjExprCount - 1; + + final int groupCount = leftInputFieldCount; + + List newAggCalls = Lists.newArrayList(); + k = -1; + for (AggregateCall aggCall : aggCalls) { + ++k; + final List argList; + + if (isCountStar.contains(k)) { + // this is a count(*), transform it to count(nullIndicator) + // the null indicator is located at the end + argList = Collections.singletonList(nullIndicatorPos); + } else { + argList = Lists.newArrayList(); + + for (int aggArg : aggCall.getArgList()) { + argList.add(aggArg + groupCount); + } + } + + int filterArg = aggCall.filterArg < 0 ? aggCall.filterArg + : aggCall.filterArg + groupCount; + newAggCalls.add( + aggCall.adaptTo(joinOutputProject, argList, filterArg, + aggregate.getGroupCount(), groupCount)); + } + + ImmutableBitSet groupSet = + ImmutableBitSet.range(groupCount); + LogicalAggregate newAggregate = + LogicalAggregate.create(joinOutputProject, + false, + groupSet, + null, + newAggCalls); + + List newAggOutputProjectList = Lists.newArrayList(); + for (int i : groupSet) { + newAggOutputProjectList.add( + rexBuilder.makeInputRef(newAggregate, i)); + } + + RexNode newAggOutputProjects = + removeCorrelationExpr(aggOutputProjects.get(0), false); + newAggOutputProjectList.add( + rexBuilder.makeCast( + cluster.getTypeFactory().createTypeWithNullability( + newAggOutputProjects.getType(), + true), + newAggOutputProjects)); + + RelNode newAggOutputProject = + RelOptUtil.createProject( + newAggregate, + newAggOutputProjectList, + null); + + call.transformTo(newAggOutputProject); + + removeCorVarFromTree(correlate); + } + } + + // REVIEW jhyde 29-Oct-2007: This rule is non-static, depends on the state + // of members in RelDecorrelator, and has side-effects in the decorrelator. + // This breaks the contract of a planner rule, and the rule will not be + // reusable in other planners. + + // REVIEW jvs 29-Oct-2007: Shouldn't it also be incorporating + // the flavor attribute into the description? + + /** Planner rule that adjusts projects when counts are added. */ + private final class AdjustProjectForCountAggregateRule extends RelOptRule { + final boolean flavor; + + public AdjustProjectForCountAggregateRule(boolean flavor) { + super( + flavor + ? operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalProject.class, + operand(LogicalAggregate.class, any()))) + : operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalAggregate.class, any()))); + this.flavor = flavor; + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalProject aggOutputProject; + final LogicalAggregate aggregate; + if (flavor) { + aggOutputProject = call.rel(2); + aggregate = call.rel(3); + } else { + aggregate = call.rel(2); + + // Create identity projection + final List> projects = Lists.newArrayList(); + final List fields = + aggregate.getRowType().getFieldList(); + for (int i = 0; i < fields.size(); i++) { + projects.add(RexInputRef.of2(projects.size(), fields)); + } + aggOutputProject = + (LogicalProject) RelOptUtil.createProject( + aggregate, + projects, + false); + } + onMatch2(call, correlate, left, aggOutputProject, aggregate); + } + + private void onMatch2( + RelOptRuleCall call, + LogicalCorrelate correlate, + RelNode leftInput, + LogicalProject aggOutputProject, + LogicalAggregate aggregate) { + if (generatedCorRels.contains(correlate)) { + // This correlator was generated by a previous invocation of + // this rule. No further work to do. + return; + } + + setCurrent(call.getPlanner().getRoot(), correlate); + + // check for this pattern + // The pattern matching could be simplified if rules can be applied + // during decorrelation, + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby (0), agg0(), agg1()...) + + // check aggOutputProj projects only one expression + List aggOutputProjExprs = aggOutputProject.getProjects(); + if (aggOutputProjExprs.size() != 1) { + return; + } + + JoinRelType joinType = correlate.getJoinType().toJoinType(); + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + List aggCalls = aggregate.getAggCallList(); + Set isCount = Sets.newHashSet(); + + // remember the count() positions + int i = -1; + for (AggregateCall aggCall : aggCalls) { + ++i; + if (aggCall.getAggregation() instanceof SqlCountAggFunction) { + isCount.add(i); + } + } + + // now rewrite the plan to + // + // Project-A' (all LHS plus transformed original projections, + // replacing references to count() with case statement) + // Correlator(left correlation, condition = true) + // LeftInputRel + // LogicalAggregate (groupby (0), agg0(), agg1()...) + // + LogicalCorrelate newCorrelate = + LogicalCorrelate.create(leftInput, aggregate, + correlate.getCorrelationId(), correlate.getRequiredColumns(), + correlate.getJoinType()); + + // remember this rel so we don't fire rule on it again + // REVIEW jhyde 29-Oct-2007: rules should not save state; rule + // should recognize patterns where it does or does not need to do + // work + generatedCorRels.add(newCorrelate); + + // need to update the mapCorVarToCorRel Update the output position + // for the cor vars: only pass on the cor vars that are not used in + // the join key. + if (cm.mapCorVarToCorRel.get(correlate.getCorrelationId()) == correlate) { + cm.mapCorVarToCorRel.put(correlate.getCorrelationId(), newCorrelate); + } + + RelNode newOutput = + aggregateCorrelatorOutput(newCorrelate, aggOutputProject, isCount); + + call.transformTo(newOutput); + } + } + + /** + * {@code Correlation} here represents a unique reference to a correlation + * field. + * For instance, if a RelNode references emp.name multiple times, it would + * result in multiple {@code Correlation} objects that differ just in + * {@link Correlation#uniqueKey}. + */ + static class Correlation + implements Comparable { + public final int uniqueKey; + public final CorrelationId corr; + public final int field; + + Correlation(CorrelationId corr, int field, int uniqueKey) { + this.corr = corr; + this.field = field; + this.uniqueKey = uniqueKey; + } + + public int compareTo(Correlation o) { + int c = corr.compareTo(o.corr); + if (c != 0) { + return c; + } + c = Integer.compare(field, o.field); + if (c != 0) { + return c; + } + return Integer.compare(uniqueKey, o.uniqueKey); + } + } + + /** A map of the locations of + * {@link org.apache.calcite.rel.logical.LogicalCorrelate} + * in a tree of {@link RelNode}s. + * + *

It is used to drive the decorrelation process. + * Treat it as immutable; rebuild if you modify the tree. + * + *

There are three maps:

    + * + *
  1. mapRefRelToCorVars map a rel node to the correlated variables it + * references; + * + *
  2. mapCorVarToCorRel maps a correlated variable to the correlatorRel + * providing it; + * + *
  3. mapFieldAccessToCorVar maps a rex field access to + * the cor var it represents. Because typeFlattener does not clone or + * modify a correlated field access this map does not need to be + * updated. + * + *
*/ + private static class CorelMap { + private final Multimap mapRefRelToCorVar; + private final SortedMap mapCorVarToCorRel; + private final Map mapFieldAccessToCorVar; + + // TODO: create immutable copies of all maps + private CorelMap(Multimap mapRefRelToCorVar, + SortedMap mapCorVarToCorRel, + Map mapFieldAccessToCorVar) { + this.mapRefRelToCorVar = mapRefRelToCorVar; + this.mapCorVarToCorRel = mapCorVarToCorRel; + this.mapFieldAccessToCorVar = ImmutableMap.copyOf(mapFieldAccessToCorVar); + } + + @Override public String toString() { + return "mapRefRelToCorVar=" + mapRefRelToCorVar + + "\nmapCorVarToCorRel=" + mapCorVarToCorRel + + "\nmapFieldAccessToCorVar=" + mapFieldAccessToCorVar + + "\n"; + } + + @Override public boolean equals(Object obj) { + return obj == this + || obj instanceof CorelMap + && mapRefRelToCorVar.equals(((CorelMap) obj).mapRefRelToCorVar) + && mapCorVarToCorRel.equals(((CorelMap) obj).mapCorVarToCorRel) + && mapFieldAccessToCorVar.equals( + ((CorelMap) obj).mapFieldAccessToCorVar); + } + + @Override public int hashCode() { + return com.google.common.base.Objects.hashCode(mapRefRelToCorVar, + mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + /** Creates a CorelMap with given contents. */ + public static CorelMap of( + SortedSetMultimap mapRefRelToCorVar, + SortedMap mapCorVarToCorRel, + Map mapFieldAccessToCorVar) { + return new CorelMap(mapRefRelToCorVar, mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + /** + * Returns whether there are any correlating variables in this statement. + * + * @return whether there are any correlating variables + */ + public boolean hasCorrelation() { + return !mapCorVarToCorRel.isEmpty(); + } + } + + /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */ + private static class CorelMapBuilder extends HiveRelShuttleImpl { + final SortedMap mapCorVarToCorRel = + new TreeMap<>(); + + final SortedSetMultimap mapRefRelToCorVar = + Multimaps.newSortedSetMultimap( + Maps.>newHashMap(), + new Supplier>() { + public TreeSet get() { + Bug.upgrade("use MultimapBuilder when we're on Guava-16"); + return Sets.newTreeSet(); + } + }); + + final Map mapFieldAccessToCorVar = + new HashMap<>(); + + final Holder offset = Holder.of(0); + int corrIdGenerator = 0; + + final List stack = new ArrayList<>(); + + /** Creates a CorelMap by iterating over a {@link RelNode} tree. */ + CorelMap build(RelNode rel) { + stripHep(rel).accept(this); + return new CorelMap(mapRefRelToCorVar, mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + @Override public RelNode visit(LogicalJoin join) { + try { + Stacks.push(stack, join); + join.getCondition().accept(rexVisitor(join)); + } finally { + Stacks.pop(stack, join); + } + return visitJoin(join); + } + + public RelNode visit(HiveJoin join) { + try { + Stacks.push(stack, join); + join.getCondition().accept(rexVisitor(join)); + } finally { + Stacks.pop(stack, join); + } + return visitJoin(join); + } + + @Override protected RelNode visitChild(RelNode parent, int i, + RelNode input) { + return super.visitChild(parent, i, stripHep(input)); + } + + @Override public RelNode visit(LogicalCorrelate correlate) { + mapCorVarToCorRel.put(correlate.getCorrelationId(), correlate); + return visitJoin(correlate); + } + + private RelNode visitJoin(BiRel join) { + final int x = offset.get(); + visitChild(join, 0, join.getLeft()); + offset.set(x + join.getLeft().getRowType().getFieldCount()); + visitChild(join, 1, join.getRight()); + offset.set(x); + return join; + } + + public RelNode visit(final HiveProject project) { + try { + Stacks.push(stack, project); + for (RexNode node : project.getProjects()) { + node.accept(rexVisitor(project)); + } + } finally { + Stacks.pop(stack, project); + } + return super.visit(project); + } + public RelNode visit(final HiveFilter filter) { + try { + Stacks.push(stack, filter); + filter.getCondition().accept(rexVisitor(filter)); + } finally { + Stacks.pop(stack, filter); + } + return super.visit(filter); + } + @Override public RelNode visit(final LogicalFilter filter) { + try { + Stacks.push(stack, filter); + filter.getCondition().accept(rexVisitor(filter)); + } finally { + Stacks.pop(stack, filter); + } + return super.visit(filter); + } + + @Override public RelNode visit(LogicalProject project) { + try { + Stacks.push(stack, project); + for (RexNode node : project.getProjects()) { + node.accept(rexVisitor(project)); + } + } finally { + Stacks.pop(stack, project); + } + return super.visit(project); + } + + private RexVisitorImpl rexVisitor(final RelNode rel) { + return new RexVisitorImpl(true) { + @Override public Void visitFieldAccess(RexFieldAccess fieldAccess) { + final RexNode ref = fieldAccess.getReferenceExpr(); + if (ref instanceof RexCorrelVariable) { + final RexCorrelVariable var = (RexCorrelVariable) ref; + final Correlation correlation = + new Correlation(var.id, + fieldAccess.getField().getIndex(), + corrIdGenerator++); + mapFieldAccessToCorVar.put(fieldAccess, correlation); + mapRefRelToCorVar.put(rel, correlation); + } + return super.visitFieldAccess(fieldAccess); + } + + @Override public Void visitSubQuery(RexSubQuery subQuery) { + subQuery.rel.accept(CorelMapBuilder.this); + return super.visitSubQuery(subQuery); + } + }; + } + } + + /** Frame describing the relational expression after decorrelation + * and where to find the output fields and correlation variables + * among its output fields. */ + static class Frame { + final RelNode r; + final ImmutableSortedMap corVarOutputPos; + final ImmutableMap oldToNewOutputPos; + + Frame(RelNode r, SortedMap corVarOutputPos, + Map oldToNewOutputPos) { + this.r = Preconditions.checkNotNull(r); + this.corVarOutputPos = ImmutableSortedMap.copyOf(corVarOutputPos); + this.oldToNewOutputPos = ImmutableSortedMap.copyOf(oldToNewOutputPos); + } + } +} + +// End RelDecorrelator.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java new file mode 100644 index 0000000..794b730 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.LogicVisitor; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql2rel.RelDecorrelator; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule + * Transform that converts IN, EXISTS and scalar sub-queries into joins. + * + *

Sub-queries are represented by {@link RexSubQuery} expressions. + * + *

A sub-query may or may not be correlated. If a sub-query is correlated, + * the wrapped {@link RelNode} will contain a {@link RexCorrelVariable} before + * the rewrite, and the product of the rewrite will be a {@link Correlate}. + * The Correlate can be removed using {@link RelDecorrelator}. + */ +public abstract class HiveSubQueryRemoveRule extends RelOptRule{ + + public static final HiveSubQueryRemoveRule FILTER = + new HiveSubQueryRemoveRule( + operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE, + any()), + RelFactories.LOGICAL_BUILDER, "SubQueryRemoveRule:Filter") { + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final RelBuilder builder = call.builder(); + final RexSubQuery e = + RexUtil.SubQueryFinder.find(filter.getCondition()); + assert e != null; + final RelOptUtil.Logic logic = + LogicVisitor.find(RelOptUtil.Logic.TRUE, + ImmutableList.of(filter.getCondition()), e); + builder.push(filter.getInput()); + final int fieldCount = builder.peek().getRowType().getFieldCount(); + final RexNode target = apply(e, filter.getVariablesSet(), logic, + builder, 1, fieldCount); + final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); + builder.filter(shuttle.apply(filter.getCondition())); + builder.project(fields(builder, filter.getRowType().getFieldCount())); + call.transformTo(builder.build()); + } + }; + + private HiveSubQueryRemoveRule(RelOptRuleOperand operand, + RelBuilderFactory relBuilderFactory, + String description) { + super(operand, relBuilderFactory, description); + } + + protected RexNode apply(RexSubQuery e, Set variablesSet, + RelOptUtil.Logic logic, + RelBuilder builder, int inputCount, int offset) { + switch (e.getKind()) { + case SCALAR_QUERY: + builder.push(e.rel); + final RelMetadataQuery mq = RelMetadataQuery.instance(); + final Boolean unique = mq.areColumnsUnique(builder.peek(), + ImmutableBitSet.of()); + if (unique == null || !unique) { + builder.aggregate(builder.groupKey(), + builder.aggregateCall(SqlStdOperatorTable.SINGLE_VALUE, false, null, + null, builder.field(0))); + } + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + return field(builder, inputCount, offset); + + case IN: + case EXISTS: + // Most general case, where the left and right keys might have nulls, and + // caller requires 3-valued logic return. + // + // select e.deptno, e.deptno in (select deptno from emp) + // + // becomes + // + // select e.deptno, + // case + // when ct.c = 0 then false + // when dt.i is not null then true + // when e.deptno is null then null + // when ct.ck < ct.c then null + // else false + // end + // from e + // left join ( + // (select count(*) as c, count(deptno) as ck from emp) as ct + // cross join (select distinct deptno, true as i from emp)) as dt + // on e.deptno = dt.deptno + // + // If keys are not null we can remove "ct" and simplify to + // + // select e.deptno, + // case + // when dt.i is not null then true + // else false + // end + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // We could further simplify to + // + // select e.deptno, + // dt.i is not null + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // but have not yet. + // + // If the logic is TRUE we can just kill the record if the condition + // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner + // join: + // + // select e.deptno, + // true + // from e + // inner join (select distinct deptno from emp) as dt + // on e.deptno = dt.deptno + // + + builder.push(e.rel); + final List fields = new ArrayList<>(); + switch (e.getKind()) { + case IN: + fields.addAll(builder.fields()); + } + + // First, the cross join + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + if (!variablesSet.isEmpty()) { + // We have not yet figured out how to include "ct" in a query if + // the source relation "e.rel" is correlated. So, dodge the issue: + // we pretend that the join key is NOT NULL. + // + // We will get wrong results in correlated IN where the join + // key has nulls. E.g. + // + // SELECT * + // FROM emp + // WHERE mgr NOT IN ( + // SELECT mgr + // FROM emp AS e2 + // WHERE + logic = RelOptUtil.Logic.TRUE_FALSE; + break; + } + builder.aggregate(builder.groupKey(), + builder.count(false, "c"), + builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", + builder.fields())); + builder.as("ct"); + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + offset += 2; + builder.push(e.rel); + break; + } + + // Now the left join + switch (logic) { + case TRUE: + if (fields.isEmpty()) { + builder.project(builder.alias(builder.literal(true), "i")); + builder.aggregate(builder.groupKey(0)); + } else { + builder.aggregate(builder.groupKey(fields)); + } + break; + default: + fields.add(builder.alias(builder.literal(true), "i")); + builder.project(fields); + builder.distinct(); + } + builder.as("dt"); + final List conditions = new ArrayList<>(); + for (Pair pair + : Pair.zip(e.getOperands(), builder.fields())) { + conditions.add( + builder.equals(pair.left, RexUtil.shift(pair.right, offset))); + } + switch (logic) { + case TRUE: + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet); + return builder.literal(true); + } + builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); + + final List keyIsNulls = new ArrayList<>(); + for (RexNode operand : e.getOperands()) { + if (operand.getType().isNullable()) { + keyIsNulls.add(builder.isNull(operand)); + } + } + final ImmutableList.Builder operands = ImmutableList.builder(); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + operands.add( + builder.equals(builder.field("ct", "c"), builder.literal(0)), + builder.literal(false)); + break; + } + operands.add(builder.isNotNull(builder.field("dt", "i")), + builder.literal(true)); + if (!keyIsNulls.isEmpty()) { + //Calcite creates null literal with Null type here but because HIVE doesn't support null type + // it is appropriately typed boolean + operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); + } + Boolean b = true; + switch (logic) { + case TRUE_FALSE_UNKNOWN: + b = null; + // fall through + case UNKNOWN_AS_TRUE: + operands.add( + builder.call(SqlStdOperatorTable.LESS_THAN, + builder.field("ct", "ck"), builder.field("ct", "c")), + builder.literal(b)); + break; + } + operands.add(builder.literal(false)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + + default: + throw new AssertionError(e.getKind()); + } + } + + /** Returns a reference to a particular field, by offset, across several + * inputs on a {@link RelBuilder}'s stack. */ + private RexInputRef field(RelBuilder builder, int inputCount, int offset) { + for (int inputOrdinal = 0;;) { + final RelNode r = builder.peek(inputCount, inputOrdinal); + if (offset < r.getRowType().getFieldCount()) { + return builder.field(inputCount, inputOrdinal, offset); + } + ++inputOrdinal; + offset -= r.getRowType().getFieldCount(); + } + } + + /** Returns a list of expressions that project the first {@code fieldCount} + * fields of the top input on a {@link RelBuilder}'s stack. */ + private static List fields(RelBuilder builder, int fieldCount) { + final List projects = new ArrayList<>(); + for (int i = 0; i < fieldCount; i++) { + projects.add(builder.field(i)); + } + return projects; + } + + /** Shuttle that replaces occurrences of a given + * {@link org.apache.calcite.rex.RexSubQuery} with a replacement + * expression. */ + private static class ReplaceSubQueryShuttle extends RexShuttle { + private final RexSubQuery subQuery; + private final RexNode replacement; + + public ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) { + this.subQuery = subQuery; + this.replacement = replacement; + } + + @Override public RexNode visitSubQuery(RexSubQuery subQuery) { + return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery; + } + } +} + +// End SubQueryRemoveRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index cb7bc26..828333e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -32,12 +32,14 @@ import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -67,6 +69,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -115,6 +118,8 @@ private InputCtx(RelDataType calciteInpDataType, ImmutableMap h private final RelOptCluster cluster; private final ImmutableList inputCtxs; private final boolean flattenExpr; + private final RowResolver outerRR; + private final ImmutableMap outerNameToPosMap; //Constructor used by HiveRexExecutorImpl public RexNodeConverter(RelOptCluster cluster) { @@ -122,16 +127,30 @@ public RexNodeConverter(RelOptCluster cluster) { } public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, + ImmutableMap outerNameToPosMap, + ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr) { + this.cluster = cluster; + this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset)); + this.flattenExpr = flattenExpr; + this.outerRR = outerRR; + this.outerNameToPosMap = outerNameToPosMap; + } + + public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { this.cluster = cluster; this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); this.flattenExpr = flattenExpr; + this.outerRR = null; + this.outerNameToPosMap = null; } public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { this.cluster = cluster; this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); this.flattenExpr = flattenExpr; + this.outerRR = null; + this.outerNameToPosMap = null; } public RexNode convert(ExprNodeDesc expr) throws SemanticException { @@ -143,12 +162,35 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { return convert((ExprNodeColumnDesc) expr); } else if (expr instanceof ExprNodeFieldDesc) { return convert((ExprNodeFieldDesc) expr); + } else if(expr instanceof ExprNodeSubQueryDesc) { + return convert((ExprNodeSubQueryDesc) expr); } else { throw new RuntimeException("Unsupported Expression"); } // TODO: handle ExprNodeColumnListDesc } + private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.IN) + { + //create RexNode for LHS + RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); + + //create RexSubQuery node + RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(), ImmutableList.of(rexNodeLhs) ); + return rexSubQuery; + } + else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.EXISTS) + { + RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); + return subQueryNode; + } + else { + assert(true); + return null; + } + } + private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); if (rexNode instanceof RexCall) { @@ -419,7 +461,7 @@ private static boolean checkForStatefulFunctions(List list) { private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { InputCtx ctxLookingFor = null; - if (inputCtxs.size() == 1) { + if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) { ctxLookingFor = inputCtxs.get(0); } else { String tableAlias = col.getTabAlias(); @@ -442,7 +484,20 @@ private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { } protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { + //if this is co-rrelated we need to make RexCorrelVariable(with id and type) + // id and type should be retrieved from outerRR InputCtx ic = getInputCtx(col); + if(ic == null) { + //we have co related column + //build data type from outer rr + //make field access passing index + //RelDataType colType = TypeConverter.convert(col.getTypeInfo(), cluster.getRexBuilder().getTypeFactory()); + RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null); + int pos = this.outerNameToPosMap.get(col.getColumn()); + CorrelationId colCorr = new CorrelationId(0); + RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); + return cluster.getRexBuilder().makeFieldAccess(corExpr, pos); + } int pos = ic.hiveNameToPosMap.get(col.getColumn()); return cluster.getRexBuilder().makeInputRef( ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 2bb6aa1..5f02f5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -24,9 +24,11 @@ import java.math.BigDecimal; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; +import java.util.Deque ; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; @@ -90,6 +92,7 @@ import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; @@ -134,6 +137,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -189,6 +193,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; @@ -999,7 +1004,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // 1. Gen Calcite Plan perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { - calciteGenPlan = genLogicalPlan(getQB(), true); + calciteGenPlan = genLogicalPlan(getQB(), true, null, null); resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema( relToHiveRR.get(calciteGenPlan), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); @@ -1023,6 +1028,15 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create executor Executor executorProvider = new HiveRexExecutorImpl(cluster); + //Remove subquery + LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, + HiveSubQueryRemoveRule.FILTER); + LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + + calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); + LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); @@ -1927,8 +1941,9 @@ private TableType obtainTableType(Table tabMetaData) { } private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean useCaching) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), useCaching); + ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), outerRR, null, useCaching); if (filterCondn instanceof ExprNodeConstantDesc && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { // queries like select * from t1 where 'foo'; @@ -1944,7 +1959,7 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap .get(srcRel); RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - hiveColNameCalcitePosMap, 0, true).convert(filterCondn); + outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, 0, true).convert(filterCondn); RexNode factoredFilterExpr = RexUtil .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), @@ -1956,146 +1971,116 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, return filterRel; } - private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, - Map aliasToRel, boolean forHavingClause) throws SemanticException { - /* - * Handle Subquery predicates. - * - * Notes (8/22/14 hb): Why is this a copy of the code from {@link - * #genFilterPlan} - for now we will support the same behavior as non CBO - * route. - but plan to allow nested SubQueries(Restriction.9.m) and - * multiple SubQuery expressions(Restriction.8.m). This requires use to - * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush - * out Null semantics(CALCITE-373) - besides only the driving code has - * been copied. Most of the code which is SubQueryUtils and QBSubQuery is - * reused. - */ - int numSrcColumns = srcRel.getRowType().getFieldCount(); - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - if (subQueriesInOriginalTree.size() > 0) { + private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel, boolean forHavingClause ) throws SemanticException { + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { /* * Restriction.9.m :: disallow nested SubQuery expressions. */ - if (qb.getSubQueryPredicateDef() != null) { - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); - } + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } /* * Restriction.8.m :: We allow only 1 SubQuery expression per Query. */ - if (subQueriesInOriginalTree.size() > 1) { + if (subQueriesInOriginalTree.size() > 1) { - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); - } - - /* - * Clone the Search AST; apply all rewrites on the clone. - */ - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outerQBRR = inputRR; - ImmutableMap outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel); - - for (int i = 0; i < subQueries.size(); i++) { - ASTNode subQueryAST = subQueries.get(i); - ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + //we do not care about the transformation or rewriting of AST which following statement does + // we only care about the restriction checks they perform. + // We plan to get rid of these restrictions later int sqIdx = qb.incrNumSubQueryPredicates(); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0); + + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + ASTNode subQueryAST = subQueries.get(0); clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); + originalSubQueryAST, ctx); + + RowResolver inputRR = relToHiveRR.get(srcRel); - if (!forHavingClause) { - qb.setWhereClauseSubQueryPredicate(subQuery); - } else { - qb.setHavingClauseSubQueryPredicate(subQuery); - } String havingInputAlias = null; + Map aliasToRel = new HashMap<>(); if (forHavingClause) { havingInputAlias = "gby_sq" + sqIdx; aliasToRel.put(havingInputAlias, srcRel); } subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, - aliasToRel.keySet()); - - QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); - qbSQ.setSubQueryDef(subQuery.getSubQuery()); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); - getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); - aliasToRel.put(subQuery.getAlias(), subQueryRelNode); - RowResolver sqRR = relToHiveRR.get(subQueryRelNode); - - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if (subQuery.getOperator().getType() != SubQueryType.EXISTS - && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { - throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, - "SubQuery can contain only 1 item in Select List.")); - } + aliasToRel.keySet()); - /* - * If this is a Not In SubQuery Predicate then Join in the Null Check - * SubQuery. See QBSubQuery.NotInCheck for details on why and how this - * is constructed. - */ - if (subQuery.getNotInCheck() != null) { - QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); - notInCheck.setSQRR(sqRR); - QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); - qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); - ctx_1 = initPhase1Ctx(); - doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); - getMetaData(qbSQ_nic); - RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); - aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); - srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, - // set explicitly to inner until we figure out SemiJoin use - // notInCheck.getJoinType(), - JoinType.INNER, notInCheck.getJoinConditionAST()); - inputRR = relToHiveRR.get(srcRel); - if (forHavingClause) { - aliasToRel.put(havingInputAlias, srcRel); - } - } + // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or + // explicitly only contain one select item. + } + } + private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, Map subQueryToRelNode) throws SemanticException { - /* - * Gen Join between outer Operator and SQ op - */ - subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); - srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), - subQuery.getJoinConditionAST()); - searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); - - srcRel = genFilterRelNode(searchCond, srcRel, forHavingClause); - - /* - * For Not Exists and Not In, add a projection on top of the Left - * Outer Join. - */ - if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { - srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + //disallow subqueries which HIVE doesn't currently support + subqueryRestritionCheck(qb, node, srcRel, forHavingClause); + Deque stack = new ArrayDeque(); + stack.push(node); + + boolean isSubQuery = false; + + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); + + switch(next.getType()) { + case HiveParser.TOK_SUBQUERY_EXPR: + { + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); + QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1((ASTNode)next.getChild(1), qbSQ, ctx_1, null); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel)); + subQueryToRelNode.put(next, subQueryRelNode); + isSubQuery = true; + break; + } + default: + int childCount = next.getChildCount(); + for(int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } } - } - relToHiveRR.put(srcRel, outerQBRR); - relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); - return srcRel; } + return isSubQuery; + } + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { + + Map subQueryToRelNode = new HashMap<>(); + boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, subQueryToRelNode); + if(isSubQuery) { + ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel), outerRR, subQueryToRelNode, forHavingClause); + + ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap + .get(srcRel); + RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(), + outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, 0, true).convert(subQueryExpr); - return genFilterRelNode(searchCond, srcRel, forHavingClause); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, convertedFilterLHS); + + this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap + .get(srcRel)); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + return filterRel; + } + else { + return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause); + } } private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { @@ -2122,14 +2107,14 @@ private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws Sema } private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, - boolean forHavingClause) throws SemanticException { + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { RelNode filterRel = null; Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() .iterator(); if (whereClauseIterator.hasNext()) { filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, - aliasToRel, forHavingClause); + aliasToRel, outerNameToPosMap, outerRR, forHavingClause); } return filterRel; @@ -3414,7 +3399,7 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { switch (qbexpr.getOpcode()) { case NULLOP: - return genLogicalPlan(qbexpr.getQB(), false); + return genLogicalPlan(qbexpr.getQB(), false, null, null); case UNION: case INTERSECT: case INTERSECTALL: @@ -3429,7 +3414,7 @@ private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { } } - private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { + private RelNode genLogicalPlan(QB qb, boolean outerMostQB, ImmutableMap outerNameToPosMap, RowResolver outerRR ) throws SemanticException { RelNode srcRel = null; RelNode filterRel = null; RelNode gbRel = null; @@ -3502,7 +3487,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept } // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false); srcRel = (filterRel == null) ? srcRel : filterRel; RelNode starSrcRel = srcRel; @@ -3603,7 +3588,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map subqueryToRelNode, boolean useCaching) + throws SemanticException { + + TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false); + tcCtx.setOuterRR(outerRR); + tcCtx.setSubqueryToRelNode(subqueryToRelNode); + return genExprNodeDesc(expr, input, tcCtx); + } + + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching) throws SemanticException { return genExprNodeDesc(expr, input, useCaching, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index 02896ff..6b17051 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.calcite.rel.RelNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import java.util.Map; /** * This class implements the context information that is used for typechecking @@ -35,6 +37,16 @@ */ private RowResolver inputRR; + /** + * RowResolver of outer query. This is used to resolve co-rrelated columns in Filter + */ + private RowResolver outerRR; + + /** + * Map from astnode of a subquery to it's logical plan + */ + private Map subqueryToRelNode ; + private final boolean useCaching; private final boolean foldExpr; @@ -104,6 +116,8 @@ public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, this.allowWindowing = allowWindowing; this.allowIndexExpr = allowIndexExpr; this.allowSubQueryExpr = allowSubQueryExpr; + this.outerRR = null; + this.subqueryToRelNode = null; } /** @@ -122,6 +136,36 @@ public RowResolver getInputRR() { } /** + * @param outerRR + * the outerRR to set + */ + public void setOuterRR(RowResolver outerRR) { + this.outerRR = outerRR; + } + + /** + * @return the outerRR + */ + public RowResolver getOuterRR() { + return outerRR; + } + + /** + * @param subqueryToRelNode + * the subqueryToRelNode to set + */ + public void setSubqueryToRelNode(Map subqueryToRelNode) { + this.subqueryToRelNode = subqueryToRelNode; + } + + /** + * @return the outerRR + */ + public Map getSubqueryToRelNode() { + return subqueryToRelNode; + } + + /** * @param unparseTranslator * the unparseTranslator to set */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 96dab3b..6027306 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.Stack; +import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.hive.common.type.HiveChar; @@ -43,7 +44,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.SubQueryWalker; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; @@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -134,7 +136,9 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) ASTNode expr = (ASTNode) nd; TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (!ctx.isUseCaching()) { + // bypass only if outerRR is not null. Otherwise we need to look for expressions in outerRR for + // subqueries e.g. select min(b.value) from table b group by b.key having key in (select .. where a = min(b.value) + if (!ctx.isUseCaching() && ctx.getOuterRR() == null) { return null; } @@ -147,6 +151,13 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); + + // try outer row resolver + RowResolver outerRR = ctx.getOuterRR(); + if( colInfo == null && outerRR != null) + { + colInfo = outerRR.getExpression(expr); + } if (colInfo != null) { desc = new ExprNodeColumnDesc(colInfo); ASTNode source = input.getExpressionSource(expr); @@ -193,14 +204,14 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) + HiveParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor()); opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor()); - opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_OP + "%"), + opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); + GraphWalker ogw = new SubQueryWalker(disp); // Create a list of top nodes ArrayList topNodes = Lists.newArrayList(expr); @@ -537,6 +548,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = input.get(null, tableOrCol); + // try outer row resolver + if(ctx.getOuterRR() != null && colInfo == null && !isTableAlias) + { + RowResolver outerRR = ctx.getOuterRR(); + isTableAlias = outerRR.hasTableAlias(tableOrCol); + colInfo = outerRR.get(null, tableOrCol); + } + if (isTableAlias) { if (colInfo != null) { if (parent != null && parent.getType() == HiveParser.DOT) { @@ -1077,6 +1096,13 @@ protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, } ColumnInfo colInfo = input.get(tableAlias, colName); + // Try outer Row resolver + if(colInfo == null && ctx.getOuterRR() != null) + { + RowResolver outerRR = ctx.getOuterRR(); + colInfo = outerRR.get(tableAlias, colName); + } + if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; @@ -1141,6 +1167,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } + if(expr.getType() == HiveParser.TOK_SUBQUERY_OP || expr.getType() == HiveParser.TOK_QUERY) { + return null; + } + if (expr.getType() == HiveParser.TOK_TABNAME) { return null; } @@ -1298,11 +1328,35 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return desc; } + boolean isIN = (expr.getChild(0).getChild(0).getType() == HiveParser.KW_IN) ; + boolean isEXISTS = (expr.getChild(0).getChild(0).getType() == HiveParser.KW_EXISTS) ; + + // subqueryToRelNode might be null if subquery expression anywhere other than + // as expected in filter (where/having). We should throw an appropriate error + // message + + Map subqueryToRelNode = ctx.getSubqueryToRelNode(); + if(subqueryToRelNode == null) + { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + "Subquery could only be used in WHERE and HAVING clause")); + } + + //For now because subquery is only supported in filter we will create subquery expression of boolean type + if(isEXISTS) { + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), ExprNodeSubQueryDesc.EXISTS); + } + if(isIN) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), ExprNodeSubQueryDesc.IN, lhs); + } + /* * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. */ ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, - "Currently SubQuery expressions are only allowed as Where Clause predicates"), + "Currently only IN & EXISTS SubQuery expressions are allowed"), sqNode); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java new file mode 100755 index 0000000..76d4604 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.calcite.rel.RelNode; + +/** + * A constant expression. + */ +public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable { + private static final long serialVersionUID = 1L; + + public static final int IN=1; + public static final int EXISTS=2; + + /** + * RexNode corresponding to subquery + */ + private RelNode rexSubQuery; + private ExprNodeDesc subQueryLhs; + private int type; + + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, int type) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = null; + this.type = type; + } + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, int type, ExprNodeDesc lhs) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = lhs; + this.type = type; + + } + + public int getType() { + return type; + } + + public ExprNodeDesc getSubQueryLhs() { + return subQueryLhs; + } + + public RelNode getRexSubQuery() { + return rexSubQuery; + } + + @Override + public ExprNodeDesc clone() { + return new ExprNodeSubQueryDesc(typeInfo, rexSubQuery, type, subQueryLhs); + } + + @Override + public boolean isSame(Object o) { + if (!(o instanceof ExprNodeSubQueryDesc)) { + return false; + } + ExprNodeSubQueryDesc dest = (ExprNodeSubQueryDesc) o; + if (subQueryLhs != null && dest.getSubQueryLhs() != null) { + if (!subQueryLhs.equals(dest.getSubQueryLhs())) { + return false; + } + } + if (!typeInfo.equals(dest.getTypeInfo())) { + return false; + } + if (!rexSubQuery.equals(dest.getRexSubQuery())) { + return false; + } + if(type != dest.getType()) { + return false; + } + return true; + } +} diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q new file mode 100644 index 0000000..14d2a2d --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_restrictions.q @@ -0,0 +1,80 @@ +--Restriction.1.h SubQueries only supported in the SQL Where Clause. +select src.key in (select key from src s1 where s1.key > '9') +from src; + +select count(*) +from src +group by src.key in (select key from src s1 where s1.key > '9') ; + +--Restriction.2.h The subquery can only be the RHS of an expression +----curently paser doesn't allow such queries +--select * from part where (select p_size from part) IN (1,2); + +--Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists. +----select * fro part where p_brand > (select key from src) + +--Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates. +---select * from src where exists (select * from part); + +--Check.5.h multiple columns in subquery select +select * from src where src.key in (select * from src s1 where s1.key > '9'); + +--Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified +--This is not restriction anymore + +--Restriction 7.h subquery with or condition +select count(*) +from src +where src.key in (select key from src s1 where s1.key > '9') or src.value is not null +; + +--Restriction.8.m We allow only 1 SubQuery expression per Query +select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part); + +--Restriction 9.m nested subquery +select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +; + +--Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause. +select * from part where p_size in (select p.p_size + part.p_size from part p); +select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type); + + +--Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate +select * from part where p_size in (select p_size from part p where p.p_type > part.p_type); +select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type)); + + +--Check.12.h SubQuery predicates cannot only refer to Parent Query columns +select * from part where p_name IN (select p_name from part p where part.p_type <> 1); + +--Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don’t exist. +select * +from src b +where exists + (select count(*) + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +; + +--Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses. +select p_mfgr, p_name, p_size +from part a +where a.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) + from part b + where a.p_brand = b.p_brand) +; + +--Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery. +select * +from src +where src.key in (select key from src where key > '9') +; + +-- correlated var which refers to outer query join table (no restriction just doesn't work with HIVE) +select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ; + diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index c01ae70..023400f 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -118,3 +118,26 @@ from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li o where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) ; + +--lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type); +select * from part where (p_size-1) IN (select min(p_size) from part group by p_type); + +-- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type); +select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type); + +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); +select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); + +-- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand); +select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand); + +-- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size); +select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size); diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q index 3f4fb7f..7fb279b 100644 --- a/ql/src/test/queries/clientpositive/subquery_notin.q +++ b/ql/src/test/queries/clientpositive/subquery_notin.q @@ -76,10 +76,10 @@ order by p_mfgr, p_size ; -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey ; diff --git a/ql/src/test/results/clientnegative/subquery_restrictions.q.out b/ql/src/test/results/clientnegative/subquery_restrictions.q.out new file mode 100644 index 0000000..fe27d00 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_restrictions.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Subquery could only be used in WHERE and HAVING clause diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 1a006d8..b132cb6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -33,7 +33,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,8 +44,24 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -55,28 +74,21 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -84,19 +96,66 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -244,7 +303,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,19 +314,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -272,23 +331,32 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -296,7 +364,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -309,6 +377,53 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 321f1cc..ae56636 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -31,19 +32,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -54,20 +52,16 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -75,19 +69,32 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -147,7 +154,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -155,18 +165,15 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -175,22 +182,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -198,19 +218,66 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -278,9 +345,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -288,19 +356,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: double) - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -321,9 +386,9 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double) + 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE @@ -386,19 +451,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -455,9 +530,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -465,19 +543,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -485,15 +560,29 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -501,10 +590,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -552,41 +641,89 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Reducer 5 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -647,8 +784,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -656,38 +795,52 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 6 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -695,7 +848,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -711,21 +864,55 @@ STAGE PLANS: Reducer 4 Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -809,45 +996,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col0 (type: int), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: l_orderkey + Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -858,44 +1066,37 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -904,32 +1105,32 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -979,3 +1180,850 @@ POSTHOOK: Input: default@lineitem #### A masked pattern was here #### 108570 8571 4297 1798 +PREHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 floor(_col7) (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- BUG NOT WORKING correlated query, multiple correlated variables referring to same outer var +--explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +--select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: -- BUG NOT WORKING correlated query, multiple correlated variables referring to same outer var +--explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +--select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) + 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) + outputColumnNames: _col3, _col1, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 3da1acb..f65437b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -27,8 +27,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -50,21 +51,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 6 @@ -79,11 +78,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -95,14 +99,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -112,18 +116,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -132,18 +136,28 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -151,7 +165,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -285,7 +299,6 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -316,8 +329,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -332,12 +345,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: part @@ -354,15 +369,18 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: p_name (type: string) + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -372,42 +390,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 8944 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 1 Data size: 227 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col5 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN (_col3 is not null) THEN (true) WHEN (_col0 is null) THEN (null) ELSE (false) END) (type: boolean) Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -436,75 +441,70 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) - Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (rank_window_0 <= 2) (type: boolean) + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) Reducer 8 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -512,7 +512,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -535,23 +534,7 @@ order by p_mfgr, b.p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique chartreuse lavender yellow 34 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 -Manufacturer#1 almond aquamarine burnished black steel 28 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 -Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -Manufacturer#3 almond antique chartreuse khaki white 17 -Manufacturer#3 almond antique metallic orange dim 19 -Manufacturer#3 almond antique olive coral navajo 45 -Manufacturer#4 almond antique violet mint lemon 39 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#4 almond azure aquamarine papaya violet 12 -Manufacturer#5 almond antique blue firebrick mint 31 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -581,8 +564,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) @@ -630,6 +614,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -639,14 +637,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col1) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -656,18 +654,18 @@ STAGE PLANS: keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 26 Data size: 3770 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 1885 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,24 +722,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -794,11 +783,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -806,7 +800,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -853,7 +847,6 @@ almond aquamarine sandy cyan gainsboro 18 almond aquamarine yellow dodger mint 7 almond azure aquamarine papaya violet 12 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- agg, corr explain select p_mfgr, p_name, p_size @@ -881,10 +874,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) @@ -900,12 +892,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: part @@ -921,34 +915,20 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -956,42 +936,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: 0 _col1 (type: string), _col2 (type: int) 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 1 Data size: 227 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 1 Data size: 227 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col5 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN (_col3 is not null) THEN (true) WHEN (_col2 is null) THEN (null) ELSE (false) END) (type: boolean) Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1025,19 +992,40 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1047,83 +1035,47 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 is null or _col0 is null) (type: boolean) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) Reducer 9 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1131,7 +1083,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) @@ -1152,41 +1103,21 @@ order by p_mfgr, p_size POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique salmon chartreuse burlywood 6 -Manufacturer#1 almond aquamarine burnished black steel 28 -Manufacturer#1 almond antique chartreuse lavender yellow 34 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 -Manufacturer#2 almond antique violet chocolate turquoise 14 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#3 almond antique forest lavender goldenrod 14 -Manufacturer#3 almond antique chartreuse khaki white 17 -Manufacturer#3 almond antique metallic orange dim 19 -Manufacturer#3 almond antique olive coral navajo 45 -Manufacturer#4 almond antique gainsboro frosted violet 10 -Manufacturer#4 almond azure aquamarine papaya violet 12 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#4 almond antique violet mint lemon 39 -Manufacturer#5 almond antique medium spring khaki 6 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Manufacturer#5 almond antique blue firebrick mint 31 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### POSTHOOK: query: -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem @@ -1207,7 +1138,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1371,7 +1302,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1390,8 +1321,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1422,19 +1354,21 @@ STAGE PLANS: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (true) ELSE (key is null) END) (type: boolean) - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() + aggregations: count(), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 6 @@ -1445,17 +1379,22 @@ STAGE PLANS: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE ((key < '11')) END) (type: boolean) - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1467,13 +1406,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1483,18 +1423,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 199 Data size: 53929 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 1 Data size: 271 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1503,18 +1443,28 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -1522,7 +1472,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index b58fcbe..c28a218 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -32,7 +32,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -40,8 +43,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -52,45 +69,86 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -237,7 +295,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -245,46 +306,54 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -297,6 +366,50 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 21a48ec..3b4aa8d 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -22,7 +22,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -30,19 +31,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan @@ -51,37 +49,45 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -140,7 +146,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -148,57 +157,113 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -265,9 +330,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -275,19 +341,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: double) - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: int) + value expressions: _col0 (type: string), _col1 (type: int) Map 3 Map Operator Tree: TableScan @@ -303,9 +366,9 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double) + 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE @@ -366,19 +429,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -434,9 +506,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -444,41 +519,54 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) + TopN Hash Memory Usage: 0.1 + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -526,39 +614,84 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col1 (type: int) Reducer 5 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 6 + Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -618,8 +751,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -627,74 +762,122 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + value expressions: _col0 (type: string) + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string) + keys: value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + keys: _col0 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: ListSink @@ -773,43 +956,62 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Map 4 + value expressions: _col0 (type: int), _col2 (type: int) + Map 6 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 + outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE @@ -818,41 +1020,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 3 + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -860,10 +1056,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -873,18 +1069,18 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -934,3 +1130,809 @@ POSTHOOK: Input: default@lineitem #### A masked pattern was here #### 108570 8571 4297 1798 +PREHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 floor(_col7) (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- BUG NOT WORKING correlated query, multiple correlated variables referring to same outer var +--explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +--select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: -- BUG NOT WORKING correlated query, multiple correlated variables referring to same outer var +--explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +--select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: p_partkey, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) + 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) + outputColumnNames: _col3, _col1, _col4 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: p_brand + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index 86f9089..1019e7a 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -25,60 +25,146 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,53 +308,136 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out index 8861c82..e54e18f 100644 --- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -22,7 +22,10 @@ having exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -32,8 +35,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -72,41 +76,132 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -175,8 +270,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -194,36 +290,48 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1166 Data size: 12387 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -236,23 +344,84 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 291 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Mux Operator + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Mux Operator + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out index 854aa36..e277c59 100644 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -60,8 +60,9 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.k POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -94,15 +95,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -119,11 +117,11 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -169,24 +167,39 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -269,8 +282,11 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-6 is a root stage + Stage-3 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -280,25 +296,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + Group By Operator + aggregations: count() + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -306,19 +319,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -332,17 +342,17 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) + key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: bigint) - 1 _col0 (type: string), _col1 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -353,6 +363,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -360,53 +412,108 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -451,8 +558,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -495,11 +603,11 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -519,8 +627,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -550,9 +659,9 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) - mode: hash + mode: complete outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -601,8 +710,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -619,8 +729,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -644,10 +755,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -673,23 +784,28 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -765,60 +881,84 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Demux Operator + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -828,7 +968,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -836,16 +976,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -855,7 +992,7 @@ STAGE PLANS: key expressions: _col2 (type: bigint) sort order: + Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator @@ -864,21 +1001,47 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Demux Operator + Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -912,24 +1075,17 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -966,60 +1122,91 @@ group by key, value having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5 Stage-5 is a root stage - Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-9, Stage-3 - Stage-9 has a backup stage: Stage-3 - Stage-6 depends on stages: Stage-9 - Stage-3 - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-6, Stage-3 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '9') (type: boolean) + predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count() keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Demux Operator + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -1027,44 +1214,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - - Stage: Stage-6 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1074,7 +1246,7 @@ STAGE PLANS: key expressions: _col2 (type: bigint) sort order: + Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator @@ -1083,101 +1255,91 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Demux Operator + Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: b + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '8') (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1203,12 +1365,9 @@ having p_name in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-6, Stage-2 - Stage-6 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-6 - Stage-2 + Stage-2 depends on stages: Stage-1, Stage-3 Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-2 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1217,8 +1376,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_name, p_mfgr, p_size Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -1250,45 +1410,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 - Conditional Operator - - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -1306,21 +1427,47 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Demux Operator + Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Map Reduce @@ -1359,24 +1506,21 @@ STAGE PLANS: window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: first_value_window_0 is not null (type: boolean) + Select Operator + expressions: first_value_window_0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: first_value_window_0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index ede7855..6ec3b46 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -19,11 +19,14 @@ where not exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -31,13 +34,34 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -45,25 +69,100 @@ STAGE PLANS: predicate: (value > 'val_2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col4 is null (type: boolean) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -243,11 +342,46 @@ where not exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -258,35 +392,80 @@ STAGE PLANS: predicate: (value > 'val_2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -306,17 +485,18 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 9349f2d..5948f9a 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -22,7 +22,10 @@ having not exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -69,31 +72,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value > 'val_12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: _col4 is null (type: boolean) Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -107,6 +102,115 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -173,8 +277,10 @@ having not exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -227,17 +333,18 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -255,6 +362,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -265,35 +414,80 @@ STAGE PLANS: predicate: (value > 'val_12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index 804f411..7071927 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain @@ -24,8 +24,9 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2, Stage-5 Stage-4 is a root stage + Stage-5 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -76,7 +77,8 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -84,8 +86,8 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -101,23 +103,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -125,18 +119,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -147,38 +141,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count() + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -186,7 +209,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select b.p_mfgr, min(p_retailprice) @@ -212,11 +234,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3, Stage-7 Stage-5 depends on stages: Stage-4 Stage-6 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -260,69 +283,46 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: double) 1 _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col4 (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN (_col2 is not null) THEN (true) WHEN (_col0 is null) THEN (null) ELSE (false) END) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -352,84 +352,120 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col2 - _col1) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col3 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: part + alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(p_retailprice), max(p_retailprice) + aggregations: min(p_retailprice) keys: p_mfgr (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) + aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 - _col1) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -437,13 +473,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr @@ -466,9 +523,7 @@ having b.p_mfgr not in POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select b.p_mfgr, min(p_retailprice) @@ -551,6 +606,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -558,7 +614,7 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -576,13 +632,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -590,10 +647,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col5 Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) @@ -613,52 +670,47 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_retailprice (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: null (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col2 (type: double) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce @@ -667,24 +719,19 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce @@ -703,8 +750,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), true (type: boolean) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) @@ -722,12 +769,17 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -735,7 +787,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index c7e1f02..61f0d42 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -41,60 +41,147 @@ POSTHOOK: query: -- non agg, corr explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: src11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key1 > '9') and value1 is not null) (type: boolean) + Select Operator + expressions: value1 (type: string) + outputColumnNames: value1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key1 (type: string), value1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: value1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key1 (type: string), value1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -111,60 +198,147 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from src a where a.key in (select key from src where a.value = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,8 +370,11 @@ from part b where b.p_size in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-6 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -207,15 +384,12 @@ STAGE PLANS: TableScan alias: part2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: p2_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p2_mfgr (type: string), p2_size (type: int) + sort order: ++ + Map-reduce partition columns: p2_mfgr (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: p2_mfgr (type: string), p2_size (type: int) - sort order: ++ - Map-reduce partition columns: p2_mfgr (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -249,18 +423,12 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -272,21 +440,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: int) + aggregations: min(_col1) + keys: _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -294,38 +472,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -340,6 +570,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -364,8 +627,11 @@ from part b where b.p_size in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-6 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -375,15 +641,12 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -417,18 +680,12 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -440,21 +697,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: int) + aggregations: min(_col1) + keys: _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -462,38 +729,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -508,6 +827,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -535,84 +887,152 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: src + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string) + keys: value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: b + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -640,8 +1060,11 @@ having count(*) in (select count(*) from src where src.key > '9' and src.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-6 is a root stage + Stage-3 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -651,25 +1074,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + Group By Operator + aggregations: count() + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -677,19 +1097,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -703,17 +1120,17 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) + key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: bigint) - 1 _col0 (type: string), _col1 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -724,6 +1141,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -731,53 +1190,108 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -785,7 +1299,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -807,15 +1320,15 @@ where b.p_name not in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2, Stage-5 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1, Stage-5 Stage-5 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -855,47 +1368,81 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -908,121 +1455,78 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col2 (type: int) TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 28 Data size: 3719 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col5 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN (_col3 is not null) THEN (true) WHEN (_col0 is null) THEN (null) ELSE (false) END) (type: boolean) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: part + alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: p_name (type: string) + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator