diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9064e49..e48f215 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1065,7 +1065,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + " expressed as multiple of Local FS read cost"), AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), - SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), + SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", false, "convert group by followed by inner equi join into semijoin"), HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + "columns in operators such as Aggregate or Join so that we try to reduce the number of shuffling stages"), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index aa3d72d..eeff3eb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -24,6 +24,9 @@ minimr.query.files=infer_bucket_sort_map_operators.q,\ # Tests that are not enabled for CLI Driver disabled.query.files=ql_rewrite_gbtoidx.q,\ ql_rewrite_gbtoidx_cbo_1.q,\ + cbo_rp_subq_in,\ + cbo_rp_subq_not_in,\ + cbo_rp_subq_exists,\ ql_rewrite_gbtoidx_cbo_2.q,\ rcfile_merge1.q,\ smb_mapjoin_8.q,\ @@ -228,6 +231,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ stats_only_null.q,\ subquery_exists.q,\ subquery_in.q,\ + subquery_restrictions,\ temp_table.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ @@ -472,7 +476,6 @@ minillaplocal.query.files=acid_globallimit.q,\ cbo_rp_join.q,\ cbo_rp_lineage2.q,\ cbo_rp_semijoin.q,\ - cbo_rp_subq_not_in.q,\ cbo_rp_unionDistinct_2.q,\ cbo_rp_windowing_2.q,\ cbo_subq_not_in.q,\ @@ -566,6 +569,8 @@ minillaplocal.query.files=acid_globallimit.q,\ special_character_in_tabnames_1.q,\ stats_based_fetch_decision.q,\ subquery_notin.q,\ + subquery_nested_subquery.q, \ + subquery_shared_alias.q, \ table_access_keys_stats.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ @@ -648,11 +653,11 @@ minillaplocal.query.files=acid_globallimit.q,\ offset_limit_ppd_optimizer.q,\ cluster.q,\ subquery_in.q,\ + subquery_restrictions,\ stats11.q,\ orc_create.q,\ orc_split_elimination.q,\ order_null.q,\ - cbo_rp_subq_in.q,\ skewjoinopt15.q,\ authorization_2.q,\ cbo_subq_in.q,\ @@ -1327,6 +1332,7 @@ spark.query.files=add_part_multiple.q, \ statsfs.q, \ subquery_exists.q, \ subquery_in.q, \ + subquery_restrictions, \ subquery_multiinsert.q, \ table_access_keys_stats.q, \ temp_table.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java new file mode 100644 index 0000000..645efe6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +public class ExpressionWalker extends DefaultGraphWalker { + + /** + * Constructor. + * + * @param disp + * dispatcher to call for each op encountered + */ + public ExpressionWalker (Dispatcher disp) { + super(disp); + } + + + /** + * We should bypass subquery since we have already processed and created logical plan + * (in genLogicalPlan) for subquery at this point. + * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc. + */ + private boolean shouldByPass(Node childNode, Node parentNode) { + if(parentNode instanceof ASTNode && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR ) + { + ASTNode parentOp = (ASTNode)parentNode; + //subquery either in WHERE IN form OR WHERE EXISTS form + //in first case LHS should not be bypassed + assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3); + if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2) ) + { + return false; + } + return true; + } + return false; + } + /** + * walk the current operator and its descendants. + * + * @param nd + * current operator in the graph + * @throws SemanticException + */ + protected void walk(Node nd) throws SemanticException { + // Push the node in the stack + opStack.push(nd); + + // While there are still nodes to dispatch... + while (!opStack.empty()) { + Node node = opStack.peek(); + + if (node.getChildren() == null || + getDispatchedList().containsAll(node.getChildren())) { + // Dispatch current node + if (!getDispatchedList().contains(node)) { + dispatch(node, opStack); + opQueue.add(node); + } + opStack.pop(); + continue; + } + + // Add a single child and restart the loop + for (Node childNode : node.getChildren()) { + if (!getDispatchedList().contains(childNode)) { + if(shouldByPass(childNode, node)) + { + retMap.put(childNode, null); + } + else { + opStack.push(childNode); + } + break; + } + } + } // end while + } + +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java new file mode 100644 index 0000000..86bebeb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +/** + * Visitor that has methods for the common logical relational expressions. + * This is required for HiveRelDecorrelation. Because we will have mix of + * HiveProject, LogicalProject etc we need an interface which can handle all + */ +public interface HiveRelShuttle extends RelShuttle { + + RelNode visit(HiveProject project); + RelNode visit(HiveFilter filter); + RelNode visit(HiveJoin join); + RelNode visit(HiveAggregate aggregate); +} + +// End RelShuttle.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java new file mode 100644 index 0000000..b92a4f3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalExchange; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMinus; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.util.Stacks; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +import java.util.ArrayList; +import java.util.List; + +public class HiveRelShuttleImpl implements HiveRelShuttle { + protected final List stack = new ArrayList(); + + /** + * Visits a particular child of a parent. + */ + protected RelNode visitChild(RelNode parent, int i, RelNode child) { + Stacks.push(stack, parent); + try { + RelNode child2 = child.accept(this); + if (child2 != child) { + final List newInputs = + new ArrayList(parent.getInputs()); + newInputs.set(i, child2); + return parent.copy(parent.getTraitSet(), newInputs); + } + return parent; + } finally { + Stacks.pop(stack, parent); + } + } + + protected RelNode visitChildren(RelNode rel) { + for (Ord input : Ord.zip(rel.getInputs())) { + rel = visitChild(rel, input.i, input.e); + } + return rel; + } + + public RelNode visit(LogicalAggregate aggregate) { + return visitChild(aggregate, 0, aggregate.getInput()); + } + + public RelNode visit(HiveAggregate aggregate) { + return visitChild(aggregate, 0, aggregate.getInput()); + } + + public RelNode visit(TableScan scan) { + return scan; + } + + public RelNode visit(TableFunctionScan scan) { + return visitChildren(scan); + } + + public RelNode visit(LogicalValues values) { + return values; + } + + public RelNode visit(HiveFilter filter) { + return visitChild(filter, 0, filter.getInput()); + } + public RelNode visit(LogicalFilter filter) { + return visitChild(filter, 0, filter.getInput()); + } + + public RelNode visit(HiveProject project) { + return visitChild(project, 0, project.getInput()); + } + + public RelNode visit(LogicalProject project) { + return visitChild(project, 0, project.getInput()); + } + + public RelNode visit(LogicalJoin join) { + return visitChildren(join); + } + + public RelNode visit(HiveJoin join) { + return visitChildren(join); + } + + public RelNode visit(LogicalCorrelate correlate) { + return visitChildren(correlate); + } + + public RelNode visit(LogicalUnion union) { + return visitChildren(union); + } + + public RelNode visit(LogicalIntersect intersect) { + return visitChildren(intersect); + } + + public RelNode visit(LogicalMinus minus) { + return visitChildren(minus); + } + + public RelNode visit(LogicalSort sort) { + return visitChildren(sort); + } + + public RelNode visit(LogicalExchange exchange) { + return visitChildren(exchange); + } + + public RelNode visit(RelNode other) { + return visitChildren(other); + } +} + +// End RelShuttleImpl.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveReplicatedRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveReplicatedRelBuilder.java new file mode 100644 index 0000000..9ce175d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveReplicatedRelBuilder.java @@ -0,0 +1,1680 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.server.CalciteServerStatement; +import org.apache.calcite.sql.SemiJoinType; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlValidatorUtil; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.util.CompositeList; +import org.apache.calcite.util.Holder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Litmus; +import org.apache.calcite.util.NlsString; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Static; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.Mappings; + +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +import java.math.BigDecimal; +import java.util.AbstractList; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * Builder for relational expressions. + * TODO: + * Note that this is copied from Calcite's RelBulder + * because CALCITE-1493 hasn't been fixed yet + * This should be deleted and replaced with RelBuilder in SubqueryRemoveRule + * once CALCITE-1493 is fixed. + * + *

{@code RelBuilder} does not make possible anything that you could not + * also accomplish by calling the factory methods of the particular relational + * expression. But it makes common tasks more straightforward and concise. + * + *

{@code RelBuilder} uses factories to create relational expressions. + * By default, it uses the default factories, which create logical relational + * expressions ({@link org.apache.calcite.rel.logical.LogicalFilter}, + * {@link org.apache.calcite.rel.logical.LogicalProject} and so forth). + * But you could override those factories so that, say, {@code filter} creates + * instead a {@code HiveFilter}. + * + *

It is not thread-safe. + */ +public class HiveReplicatedRelBuilder { + private static final Function FN_TYPE = + new Function() { + public String apply(RexNode input) { + return input + ": " + input.getType(); + } + }; + + protected final RelOptCluster cluster; + protected final RelOptSchema relOptSchema; + private final RelFactories.FilterFactory filterFactory; + private final RelFactories.ProjectFactory projectFactory; + private final RelFactories.AggregateFactory aggregateFactory; + private final RelFactories.SortFactory sortFactory; + private final RelFactories.SetOpFactory setOpFactory; + private final RelFactories.JoinFactory joinFactory; + private final RelFactories.SemiJoinFactory semiJoinFactory; + private final RelFactories.CorrelateFactory correlateFactory; + private final RelFactories.ValuesFactory valuesFactory; + private final RelFactories.TableScanFactory scanFactory; + private final Deque stack = new ArrayDeque<>(); + + public HiveReplicatedRelBuilder(Context context, RelOptCluster cluster, + RelOptSchema relOptSchema) { + this.cluster = cluster; + this.relOptSchema = relOptSchema; + if (context == null) { + context = Contexts.EMPTY_CONTEXT; + } + this.aggregateFactory = + Util.first(context.unwrap(RelFactories.AggregateFactory.class), + RelFactories.DEFAULT_AGGREGATE_FACTORY); + this.filterFactory = + Util.first(context.unwrap(RelFactories.FilterFactory.class), + RelFactories.DEFAULT_FILTER_FACTORY); + this.projectFactory = + Util.first(context.unwrap(RelFactories.ProjectFactory.class), + RelFactories.DEFAULT_PROJECT_FACTORY); + this.sortFactory = + Util.first(context.unwrap(RelFactories.SortFactory.class), + RelFactories.DEFAULT_SORT_FACTORY); + this.setOpFactory = + Util.first(context.unwrap(RelFactories.SetOpFactory.class), + RelFactories.DEFAULT_SET_OP_FACTORY); + this.joinFactory = + Util.first(context.unwrap(RelFactories.JoinFactory.class), + RelFactories.DEFAULT_JOIN_FACTORY); + this.semiJoinFactory = + Util.first(context.unwrap(RelFactories.SemiJoinFactory.class), + RelFactories.DEFAULT_SEMI_JOIN_FACTORY); + this.correlateFactory = + Util.first(context.unwrap(RelFactories.CorrelateFactory.class), + RelFactories.DEFAULT_CORRELATE_FACTORY); + this.valuesFactory = + Util.first(context.unwrap(RelFactories.ValuesFactory.class), + RelFactories.DEFAULT_VALUES_FACTORY); + this.scanFactory = + Util.first(context.unwrap(RelFactories.TableScanFactory.class), + RelFactories.DEFAULT_TABLE_SCAN_FACTORY); + } + + /** Creates a RelBuilder. */ + public static HiveReplicatedRelBuilder create(FrameworkConfig config) { + final RelOptCluster[] clusters = {null}; + final RelOptSchema[] relOptSchemas = {null}; + Frameworks.withPrepare( + new Frameworks.PrepareAction(config) { + public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema, + SchemaPlus rootSchema, CalciteServerStatement statement) { + clusters[0] = cluster; + relOptSchemas[0] = relOptSchema; + return null; + } + }); + return new HiveReplicatedRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]); + } + + /** Returns the type factory. */ + public RelDataTypeFactory getTypeFactory() { + return cluster.getTypeFactory(); + } + + /** Returns the builder for {@link RexNode} expressions. */ + public RexBuilder getRexBuilder() { + return cluster.getRexBuilder(); + } + + /** Adds a relational expression to be the input to the next relational + * expression constructed. + * + *

This method is usual when you want to weave in relational expressions + * that are not supported by the builder. If, while creating such expressions, + * you need to use previously built expressions as inputs, call + * {@link #build()} to pop those inputs. */ + public HiveReplicatedRelBuilder push(RelNode node) { + stack.push(new Frame(node)); + return this; + } + + /** Pushes a collection of relational expressions. */ + public HiveReplicatedRelBuilder pushAll(Iterable nodes) { + for (RelNode node : nodes) { + push(node); + } + return this; + } + + /** Returns the final relational expression. + * + *

Throws if the stack is empty. + */ + public RelNode build() { + return stack.pop().rel; + } + + /** Returns the relational expression at the top of the stack, but does not + * remove it. */ + public RelNode peek() { + return peek_().rel; + } + + private Frame peek_() { + return stack.peek(); + } + + /** Returns the relational expression {@code n} positions from the top of the + * stack, but does not remove it. */ + public RelNode peek(int n) { + return peek_(n).rel; + } + + private Frame peek_(int n) { + return Iterables.get(stack, n); + } + + /** Returns the relational expression {@code n} positions from the top of the + * stack, but does not remove it. */ + public RelNode peek(int inputCount, int inputOrdinal) { + return peek_(inputCount, inputOrdinal).rel; + } + + private Frame peek_(int inputCount, int inputOrdinal) { + return peek_(inputCount - 1 - inputOrdinal); + } + + /** Returns the number of fields in all inputs before (to the left of) + * the given input. + * + * @param inputCount Number of inputs + * @param inputOrdinal Input ordinal + */ + private int inputOffset(int inputCount, int inputOrdinal) { + int offset = 0; + for (int i = 0; i < inputOrdinal; i++) { + offset += peek(inputCount, i).getRowType().getFieldCount(); + } + return offset; + } + + // Methods that return scalar expressions + + /** Creates a literal (constant expression). */ + public RexNode literal(Object value) { + final RexBuilder rexBuilder = cluster.getRexBuilder(); + if (value == null) { + return rexBuilder.constantNull(); + } else if (value instanceof Boolean) { + return rexBuilder.makeLiteral((Boolean) value); + } else if (value instanceof BigDecimal) { + return rexBuilder.makeExactLiteral((BigDecimal) value); + } else if (value instanceof Float || value instanceof Double) { + return rexBuilder.makeApproxLiteral( + BigDecimal.valueOf(((Number) value).doubleValue())); + } else if (value instanceof Number) { + return rexBuilder.makeExactLiteral( + BigDecimal.valueOf(((Number) value).longValue())); + } else if (value instanceof String) { + return rexBuilder.makeLiteral((String) value); + } else { + throw new IllegalArgumentException("cannot convert " + value + + " (" + value.getClass() + ") to a constant"); + } + } + + /** Creates a reference to a field by name. + * + *

Equivalent to {@code field(1, 0, fieldName)}. + * + * @param fieldName Field name + */ + public RexInputRef field(String fieldName) { + return field(1, 0, fieldName); + } + + /** Creates a reference to a field of given input relational expression + * by name. + * + * @param inputCount Number of inputs + * @param inputOrdinal Input ordinal + * @param fieldName Field name + */ + public RexInputRef field(int inputCount, int inputOrdinal, String fieldName) { + final Frame frame = peek_(inputCount, inputOrdinal); + final List fieldNames = Pair.left(frame.fields()); + int i = fieldNames.indexOf(fieldName); + if (i >= 0) { + return field(inputCount, inputOrdinal, i); + } else { + throw new IllegalArgumentException("field [" + fieldName + + "] not found; input fields are: " + fieldNames); + } + } + + /** Creates a reference to an input field by ordinal. + * + *

Equivalent to {@code field(1, 0, ordinal)}. + * + * @param fieldOrdinal Field ordinal + */ + public RexInputRef field(int fieldOrdinal) { + return (RexInputRef) field(1, 0, fieldOrdinal, false); + } + + /** Creates a reference to a field of a given input relational expression + * by ordinal. + * + * @param inputCount Number of inputs + * @param inputOrdinal Input ordinal + * @param fieldOrdinal Field ordinal within input + */ + public RexInputRef field(int inputCount, int inputOrdinal, int fieldOrdinal) { + return (RexInputRef) field(inputCount, inputOrdinal, fieldOrdinal, false); + } + + /** As {@link #field(int, int, int)}, but if {@code alias} is true, the method + * may apply an alias to make sure that the field has the same name as in the + * input frame. If no alias is applied the expression is definitely a + * {@link RexInputRef}. */ + private RexNode field(int inputCount, int inputOrdinal, int fieldOrdinal, + boolean alias) { + final Frame frame = peek_(inputCount, inputOrdinal); + final RelNode input = frame.rel; + final RelDataType rowType = input.getRowType(); + if (fieldOrdinal < 0 || fieldOrdinal > rowType.getFieldCount()) { + throw new IllegalArgumentException("field ordinal [" + fieldOrdinal + + "] out of range; input fields are: " + rowType.getFieldNames()); + } + final RelDataTypeField field = rowType.getFieldList().get(fieldOrdinal); + final int offset = inputOffset(inputCount, inputOrdinal); + final RexInputRef ref = cluster.getRexBuilder() + .makeInputRef(field.getType(), offset + fieldOrdinal); + final RelDataTypeField aliasField = frame.fields().get(fieldOrdinal); + if (!alias || field.getName().equals(aliasField.getName())) { + return ref; + } else { + return alias(ref, aliasField.getName()); + } + } + + /** Creates a reference to a field of the current record which originated + * in a relation with a given alias. */ + public RexNode field(String alias, String fieldName) { + Preconditions.checkNotNull(alias); + Preconditions.checkNotNull(fieldName); + final Frame frame = stack.peek(); + final List aliases = new ArrayList<>(); + int offset = 0; + for (Pair pair : frame.right) { + if (pair.left != null && pair.left.equals(alias)) { + int i = pair.right.getFieldNames().indexOf(fieldName); + if (i >= 0) { + return field(offset + i); + } else { + throw new IllegalArgumentException("no field '" + fieldName + + "' in relation '" + alias + + "'; fields are: " + pair.right.getFieldNames()); + } + } + aliases.add(pair.left); + offset += pair.right.getFieldCount(); + } + throw new IllegalArgumentException("no relation wtih alias '" + alias + + "'; aliases are: " + aliases); + } + + /** Returns references to the fields of the top input. */ + public ImmutableList fields() { + return fields(1, 0); + } + + /** Returns references to the fields of a given input. */ + public ImmutableList fields(int inputCount, int inputOrdinal) { + final RelNode input = peek(inputCount, inputOrdinal); + final RelDataType rowType = input.getRowType(); + final ImmutableList.Builder nodes = ImmutableList.builder(); + for (int fieldOrdinal : Util.range(rowType.getFieldCount())) { + nodes.add(field(inputCount, inputOrdinal, fieldOrdinal)); + } + return nodes.build(); + } + + /** Returns references to fields for a given collation. */ + public ImmutableList fields(RelCollation collation) { + final ImmutableList.Builder nodes = ImmutableList.builder(); + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + RexNode node = field(fieldCollation.getFieldIndex()); + switch (fieldCollation.direction) { + case DESCENDING: + node = desc(node); + } + switch (fieldCollation.nullDirection) { + case FIRST: + node = nullsFirst(node); + break; + case LAST: + node = nullsLast(node); + break; + } + nodes.add(node); + } + return nodes.build(); + } + + /** Returns references to fields for a given list of input ordinals. */ + public ImmutableList fields(List ordinals) { + final ImmutableList.Builder nodes = ImmutableList.builder(); + for (Number ordinal : ordinals) { + RexNode node = field(1, 0, ordinal.intValue(), true); + nodes.add(node); + } + return nodes.build(); + } + + /** Returns references to fields identified by name. */ + public ImmutableList fields(Iterable fieldNames) { + final ImmutableList.Builder builder = ImmutableList.builder(); + for (String fieldName : fieldNames) { + builder.add(field(fieldName)); + } + return builder.build(); + } + + /** Returns references to fields identified by a mapping. */ + public ImmutableList fields(Mappings.TargetMapping mapping) { + return fields(Mappings.asList(mapping)); + } + + /** Creates an access to a field by name. */ + public RexNode dot(RexNode node, String fieldName) { + final RexBuilder builder = cluster.getRexBuilder(); + return builder.makeFieldAccess(node, fieldName, true); + } + + /** Creates an access to a field by ordinal. */ + public RexNode dot(RexNode node, int fieldOrdinal) { + final RexBuilder builder = cluster.getRexBuilder(); + return builder.makeFieldAccess(node, fieldOrdinal); + } + + /** Creates a call to a scalar operator. */ + public RexNode call(SqlOperator operator, RexNode... operands) { + final RexBuilder builder = cluster.getRexBuilder(); + final List operandList = ImmutableList.copyOf(operands); + final RelDataType type = builder.deriveReturnType(operator, operandList); + if (type == null) { + throw new IllegalArgumentException("cannot derive type: " + operator + + "; operands: " + Lists.transform(operandList, FN_TYPE)); + } + return builder.makeCall(type, operator, operandList); + } + + /** Creates a call to a scalar operator. */ + public RexNode call(SqlOperator operator, + Iterable operands) { + return cluster.getRexBuilder().makeCall(operator, + ImmutableList.copyOf(operands)); + } + + /** Creates an AND. */ + public RexNode and(RexNode... operands) { + return and(ImmutableList.copyOf(operands)); + } + + /** Creates an AND. + * + *

Simplifies the expression a little: + * {@code e AND TRUE} becomes {@code e}; + * {@code e AND e2 AND NOT e} becomes {@code e2}. */ + public RexNode and(Iterable operands) { + return RexUtil.simplifyAnds(cluster.getRexBuilder(), operands); + } + + /** Creates an OR. */ + public RexNode or(RexNode... operands) { + return or(ImmutableList.copyOf(operands)); + } + + /** Creates an OR. */ + public RexNode or(Iterable operands) { + return RexUtil.composeDisjunction(cluster.getRexBuilder(), operands, false); + } + + /** Creates a NOT. */ + public RexNode not(RexNode operand) { + return call(SqlStdOperatorTable.NOT, operand); + } + + /** Creates an =. */ + public RexNode equals(RexNode operand0, RexNode operand1) { + return call(SqlStdOperatorTable.EQUALS, operand0, operand1); + } + + /** Creates a IS NULL. */ + public RexNode isNull(RexNode operand) { + return call(SqlStdOperatorTable.IS_NULL, operand); + } + + /** Creates a IS NOT NULL. */ + public RexNode isNotNull(RexNode operand) { + return call(SqlStdOperatorTable.IS_NOT_NULL, operand); + } + + /** Creates an expression that casts an expression to a given type. */ + public RexNode cast(RexNode expr, SqlTypeName typeName) { + final RelDataType type = cluster.getTypeFactory().createSqlType(typeName); + return cluster.getRexBuilder().makeCast(type, expr); + } + + /** Creates an expression that casts an expression to a type with a given name + * and precision or length. */ + public RexNode cast(RexNode expr, SqlTypeName typeName, int precision) { + final RelDataType type = + cluster.getTypeFactory().createSqlType(typeName, precision); + return cluster.getRexBuilder().makeCast(type, expr); + } + + /** Creates an expression that casts an expression to a type with a given + * name, precision and scale. */ + public RexNode cast(RexNode expr, SqlTypeName typeName, int precision, + int scale) { + final RelDataType type = + cluster.getTypeFactory().createSqlType(typeName, precision, scale); + return cluster.getRexBuilder().makeCast(type, expr); + } + + /** + * Returns an expression wrapped in an alias. + * + * @see #project + */ + public RexNode alias(RexNode expr, String alias) { + return call(SqlStdOperatorTable.AS, expr, literal(alias)); + } + + /** Converts a sort expression to descending. */ + public RexNode desc(RexNode node) { + return call(SqlStdOperatorTable.DESC, node); + } + + /** Converts a sort expression to nulls last. */ + public RexNode nullsLast(RexNode node) { + return call(SqlStdOperatorTable.NULLS_LAST, node); + } + + /** Converts a sort expression to nulls first. */ + public RexNode nullsFirst(RexNode node) { + return call(SqlStdOperatorTable.NULLS_FIRST, node); + } + + // Methods that create group keys and aggregate calls + + /** Creates an empty group key. */ + public GroupKey groupKey() { + return groupKey(ImmutableList.of()); + } + + /** Creates a group key. */ + public GroupKey groupKey(RexNode... nodes) { + return groupKey(ImmutableList.copyOf(nodes)); + } + + /** Creates a group key. */ + public GroupKey groupKey(Iterable nodes) { + return new GroupKeyImpl(ImmutableList.copyOf(nodes), false, null, null); + } + + /** Creates a group key with grouping sets. */ + public GroupKey groupKey(Iterable nodes, boolean indicator, + Iterable> nodeLists) { + final ImmutableList.Builder> builder = + ImmutableList.builder(); + for (Iterable nodeList : nodeLists) { + builder.add(ImmutableList.copyOf(nodeList)); + } + return new GroupKeyImpl(ImmutableList.copyOf(nodes), indicator, builder.build(), null); + } + + /** Creates a group key of fields identified by ordinal. */ + public GroupKey groupKey(int... fieldOrdinals) { + return groupKey(fields(ImmutableIntList.of(fieldOrdinals))); + } + + /** Creates a group key of fields identified by name. */ + public GroupKey groupKey(String... fieldNames) { + return groupKey(fields(ImmutableList.copyOf(fieldNames))); + } + + /** Creates a group key with grouping sets, both identified by field positions + * in the underlying relational expression. + * + *

This method of creating a group key does not allow you to group on new + * expressions, only column projections, but is efficient, especially when you + * are coming from an existing {@link Aggregate}. */ + public GroupKey groupKey(ImmutableBitSet groupSet, boolean indicator, + ImmutableList groupSets) { + if (groupSet.length() > peek().getRowType().getFieldCount()) { + throw new IllegalArgumentException("out of bounds: " + groupSet); + } + if (groupSets == null) { + groupSets = ImmutableList.of(groupSet); + } + final ImmutableList nodes = + fields(ImmutableIntList.of(groupSet.toArray())); + final List> nodeLists = + Lists.transform(groupSets, + new Function>() { + public ImmutableList apply(ImmutableBitSet input) { + return fields(ImmutableIntList.of(input.toArray())); + } + }); + return groupKey(nodes, indicator, nodeLists); + } + + /** Creates a call to an aggregate function. */ + public AggCall aggregateCall(SqlAggFunction aggFunction, boolean distinct, + RexNode filter, String alias, RexNode... operands) { + return aggregateCall(aggFunction, distinct, filter, alias, + ImmutableList.copyOf(operands)); + } + + /** Creates a call to an aggregate function. */ + public AggCall aggregateCall(SqlAggFunction aggFunction, boolean distinct, + RexNode filter, String alias, Iterable operands) { + if (filter != null) { + if (filter.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { + throw Static.RESOURCE.filterMustBeBoolean().ex(); + } + if (filter.getType().isNullable()) { + filter = call(SqlStdOperatorTable.IS_TRUE, filter); + } + } + return new AggCallImpl(aggFunction, distinct, filter, alias, + ImmutableList.copyOf(operands)); + } + + /** Creates a call to the COUNT aggregate function. */ + public AggCall count(boolean distinct, String alias, RexNode... operands) { + return aggregateCall(SqlStdOperatorTable.COUNT, distinct, null, alias, + operands); + } + + /** Creates a call to the COUNT(*) aggregate function. */ + public AggCall countStar(String alias) { + return aggregateCall(SqlStdOperatorTable.COUNT, false, null, alias); + } + + /** Creates a call to the SUM aggregate function. */ + public AggCall sum(boolean distinct, String alias, RexNode operand) { + return aggregateCall(SqlStdOperatorTable.SUM, distinct, null, alias, + operand); + } + + /** Creates a call to the AVG aggregate function. */ + public AggCall avg(boolean distinct, String alias, RexNode operand) { + return aggregateCall( + SqlStdOperatorTable.AVG, distinct, null, alias, operand); + } + + /** Creates a call to the MIN aggregate function. */ + public AggCall min(String alias, RexNode operand) { + return aggregateCall(SqlStdOperatorTable.MIN, false, null, alias, operand); + } + + /** Creates a call to the MAX aggregate function. */ + public AggCall max(String alias, RexNode operand) { + return aggregateCall(SqlStdOperatorTable.MAX, false, null, alias, operand); + } + + // Methods that create relational expressions + + /** Creates a {@link org.apache.calcite.rel.core.TableScan} of the table + * with a given name. + * + *

Throws if the table does not exist. + * + *

Returns this builder. + * + * @param tableNames Name of table (can optionally be qualified) + */ + public HiveReplicatedRelBuilder scan(Iterable tableNames) { + final List names = ImmutableList.copyOf(tableNames); + final RelOptTable relOptTable = relOptSchema.getTableForMember(names); + if (relOptTable == null) { + throw Static.RESOURCE.tableNotFound(Joiner.on(".").join(names)).ex(); + } + final RelNode scan = scanFactory.createScan(cluster, relOptTable); + push(scan); + return this; + } + + /** Creates a {@link org.apache.calcite.rel.core.TableScan} of the table + * with a given name. + * + *

Throws if the table does not exist. + * + *

Returns this builder. + * + * @param tableNames Name of table (can optionally be qualified) + */ + public HiveReplicatedRelBuilder scan(String... tableNames) { + return scan(ImmutableList.copyOf(tableNames)); + } + + /** Creates a {@link org.apache.calcite.rel.core.Filter} of an array of + * predicates. + * + *

The predicates are combined using AND, + * and optimized in a similar way to the {@link #and} method. + * If the result is TRUE no filter is created. */ + public HiveReplicatedRelBuilder filter(RexNode... predicates) { + return filter(ImmutableList.copyOf(predicates)); + } + + /** Creates a {@link org.apache.calcite.rel.core.Filter} of a list of + * predicates. + * + *

The predicates are combined using AND, + * and optimized in a similar way to the {@link #and} method. + * If the result is TRUE no filter is created. */ + public HiveReplicatedRelBuilder filter(Iterable predicates) { + final RexNode x = RexUtil.simplifyAnds(cluster.getRexBuilder(), predicates, true); + if (x.isAlwaysFalse()) { + return empty(); + } + if (!x.isAlwaysTrue()) { + final Frame frame = stack.pop(); + final RelNode filter = filterFactory.createFilter(frame.rel, x); + stack.push(new Frame(filter, frame.right)); + } + return this; + } + + + /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list + * of expressions. + * + *

Infers names as would {@link #project(Iterable, Iterable)} if all + * suggested names were null. + * + * @param nodes Expressions + */ + public HiveReplicatedRelBuilder project(Iterable nodes) { + return project(nodes, ImmutableList.of()); + } + + /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list + * of expressions and field names. + * + *

Infers names as would {@link #project(Iterable, Iterable)} if all + * suggested names were null. + * + * @param nodes Expressions + * @param fieldNames field names for expressions + */ + public HiveReplicatedRelBuilder project(Iterable nodes, + Iterable fieldNames) { + return project(nodes, fieldNames, false); + } + + /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list + * of expressions, using the given names. + * + *

Names are deduced as follows: + *

    + *
  • If the length of {@code fieldNames} is greater than the index of + * the current entry in {@code nodes}, and the entry in + * {@code fieldNames} is not null, uses it; otherwise + *
  • If an expression projects an input field, + * or is a cast an input field, + * uses the input field name; otherwise + *
  • If an expression is a call to + * {@link org.apache.calcite.sql.fun.SqlStdOperatorTable#AS} + * (see {@link #alias}), removes the call but uses the intended alias. + *
+ * + *

After the field names have been inferred, makes the + * field names unique by appending numeric suffixes. + * + * @param nodes Expressions + * @param fieldNames Suggested field names + * @param force create project even if it is identity + */ + public HiveReplicatedRelBuilder project( + Iterable nodes, + Iterable fieldNames, + boolean force) { + final List names = new ArrayList<>(); + final List exprList = Lists.newArrayList(nodes); + final Iterator nameIterator = fieldNames.iterator(); + for (RexNode node : nodes) { + final String name = nameIterator.hasNext() ? nameIterator.next() : null; + final String name2 = inferAlias(exprList, node); + names.add(Util.first(name, name2)); + } + final RelDataType inputRowType = peek().getRowType(); + if (!force && RexUtil.isIdentity(exprList, inputRowType)) { + if (names.equals(inputRowType.getFieldNames())) { + // Do not create an identity project if it does not rename any fields + return this; + } else { + // create "virtual" row type for project only rename fields + final Frame frame = stack.pop(); + final RelDataType rowType = + RexUtil.createStructType(cluster.getTypeFactory(), exprList, + names, SqlValidatorUtil.F_SUGGESTER); + stack.push( + new Frame(frame.rel, + ImmutableList.of(Pair.of(frame.right.get(0).left, rowType)))); + return this; + } + } + final RelNode project = + projectFactory.createProject(build(), ImmutableList.copyOf(exprList), + names); + push(project); + return this; + } + + /** Creates a {@link org.apache.calcite.rel.core.Project} of the given + * expressions. */ + public HiveReplicatedRelBuilder project(RexNode... nodes) { + return project(ImmutableList.copyOf(nodes)); + } + + /** Infers the alias of an expression. + * + *

If the expression was created by {@link #alias}, replaces the expression + * in the project list. + */ + private String inferAlias(List exprList, RexNode expr) { + switch (expr.getKind()) { + case INPUT_REF: + final RexInputRef ref = (RexInputRef) expr; + return peek(0).getRowType().getFieldNames().get(ref.getIndex()); + case CAST: + return inferAlias(exprList, ((RexCall) expr).getOperands().get(0)); + case AS: + final RexCall call = (RexCall) expr; + for (;;) { + final int i = exprList.indexOf(expr); + if (i < 0) { + break; + } + exprList.set(i, call.getOperands().get(0)); + } + return ((NlsString) ((RexLiteral) call.getOperands().get(1)).getValue()) + .getValue(); + default: + return null; + } + } + + /** Creates an {@link org.apache.calcite.rel.core.Aggregate} that makes the + * relational expression distinct on all fields. */ + public HiveReplicatedRelBuilder distinct() { + return aggregate(groupKey(fields())); + } + + /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with an array of + * calls. */ + public HiveReplicatedRelBuilder aggregate(GroupKey groupKey, AggCall... aggCalls) { + return aggregate(groupKey, ImmutableList.copyOf(aggCalls)); + } + + /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with a list of + * calls. */ + public HiveReplicatedRelBuilder aggregate(GroupKey groupKey, Iterable aggCalls) { + final RelDataType inputRowType = peek().getRowType(); + final List extraNodes = projects(inputRowType); + final GroupKeyImpl groupKey_ = (GroupKeyImpl) groupKey; + final ImmutableBitSet groupSet = + ImmutableBitSet.of(registerExpressions(extraNodes, groupKey_.nodes)); + final ImmutableList groupSets; + if (groupKey_.nodeLists != null) { + final int sizeBefore = extraNodes.size(); + final SortedSet groupSetSet = + new TreeSet<>(ImmutableBitSet.ORDERING); + for (ImmutableList nodeList : groupKey_.nodeLists) { + final ImmutableBitSet groupSet2 = + ImmutableBitSet.of(registerExpressions(extraNodes, nodeList)); + if (!groupSet.contains(groupSet2)) { + throw new IllegalArgumentException("group set element " + nodeList + + " must be a subset of group key"); + } + groupSetSet.add(groupSet2); + } + groupSets = ImmutableList.copyOf(groupSetSet); + if (extraNodes.size() > sizeBefore) { + throw new IllegalArgumentException( + "group sets contained expressions not in group key: " + + extraNodes.subList(sizeBefore, extraNodes.size())); + } + } else { + groupSets = ImmutableList.of(groupSet); + } + for (AggCall aggCall : aggCalls) { + if (aggCall instanceof AggCallImpl) { + final AggCallImpl aggCall1 = (AggCallImpl) aggCall; + registerExpressions(extraNodes, aggCall1.operands); + if (aggCall1.filter != null) { + registerExpression(extraNodes, aggCall1.filter); + } + } + } + if (extraNodes.size() > inputRowType.getFieldCount()) { + project(extraNodes); + } + final RelNode r = build(); + final List aggregateCalls = new ArrayList<>(); + for (AggCall aggCall : aggCalls) { + final AggregateCall aggregateCall; + if (aggCall instanceof AggCallImpl) { + final AggCallImpl aggCall1 = (AggCallImpl) aggCall; + final List args = registerExpressions(extraNodes, aggCall1.operands); + final int filterArg = aggCall1.filter == null ? -1 + : registerExpression(extraNodes, aggCall1.filter); + aggregateCall = + AggregateCall.create(aggCall1.aggFunction, aggCall1.distinct, args, + filterArg, groupSet.cardinality(), r, null, aggCall1.alias); + } else { + aggregateCall = ((AggCallImpl2) aggCall).aggregateCall; + } + aggregateCalls.add(aggregateCall); + } + + assert ImmutableBitSet.ORDERING.isStrictlyOrdered(groupSets) : groupSets; + for (ImmutableBitSet set : groupSets) { + assert groupSet.contains(set); + } + RelNode aggregate = aggregateFactory.createAggregate(r, + groupKey_.indicator, groupSet, groupSets, aggregateCalls); + push(aggregate); + return this; + } + + private List projects(RelDataType inputRowType) { + final List exprList = new ArrayList<>(); + for (RelDataTypeField field : inputRowType.getFieldList()) { + final RexBuilder rexBuilder = cluster.getRexBuilder(); + exprList.add(rexBuilder.makeInputRef(field.getType(), field.getIndex())); + } + return exprList; + } + + private static int registerExpression(List exprList, RexNode node) { + int i = exprList.indexOf(node); + if (i < 0) { + i = exprList.size(); + exprList.add(node); + } + return i; + } + + private static List registerExpressions(List extraNodes, + Iterable nodes) { + final List builder = new ArrayList<>(); + for (RexNode node : nodes) { + builder.add(registerExpression(extraNodes, node)); + } + return builder; + } + + private HiveReplicatedRelBuilder setOp(boolean all, SqlKind kind, int n) { + List inputs = new LinkedList<>(); + for (int i = 0; i < n; i++) { + inputs.add(0, build()); + } + switch (kind) { + case UNION: + case INTERSECT: + case EXCEPT: + if (n < 1) { + throw new IllegalArgumentException( + "bad INTERSECT/UNION/EXCEPT input count"); + } + break; + default: + throw new AssertionError("bad setOp " + kind); + } + switch (n) { + case 1: + return push(inputs.get(0)); + default: + return push(setOpFactory.createSetOp(kind, inputs, all)); + } + } + + /** Creates a {@link org.apache.calcite.rel.core.Union} of the two most recent + * relational expressions on the stack. + * + * @param all Whether to create UNION ALL + */ + public HiveReplicatedRelBuilder union(boolean all) { + return union(all, 2); + } + + /** Creates a {@link org.apache.calcite.rel.core.Union} of the {@code n} + * most recent relational expressions on the stack. + * + * @param all Whether to create UNION ALL + * @param n Number of inputs to the UNION operator + */ + public HiveReplicatedRelBuilder union(boolean all, int n) { + return setOp(all, SqlKind.UNION, n); + } + + /** Creates an {@link org.apache.calcite.rel.core.Intersect} of the two most + * recent relational expressions on the stack. + * + * @param all Whether to create INTERSECT ALL + */ + public HiveReplicatedRelBuilder intersect(boolean all) { + return intersect(all, 2); + } + + /** Creates an {@link org.apache.calcite.rel.core.Intersect} of the {@code n} + * most recent relational expressions on the stack. + * + * @param all Whether to create INTERSECT ALL + * @param n Number of inputs to the INTERSECT operator + */ + public HiveReplicatedRelBuilder intersect(boolean all, int n) { + return setOp(all, SqlKind.INTERSECT, n); + } + + /** Creates a {@link org.apache.calcite.rel.core.Minus} of the two most recent + * relational expressions on the stack. + * + * @param all Whether to create EXCEPT ALL + */ + public HiveReplicatedRelBuilder minus(boolean all) { + return minus(all, 2); + } + + /** Creates a {@link org.apache.calcite.rel.core.Minus} of the {@code n} + * most recent relational expressions on the stack. + * + * @param all Whether to create EXCEPT ALL + */ + public HiveReplicatedRelBuilder minus(boolean all, int n) { + return setOp(all, SqlKind.EXCEPT, n); + } + + /** Creates a {@link org.apache.calcite.rel.core.Join}. */ + public HiveReplicatedRelBuilder join(JoinRelType joinType, RexNode condition0, + RexNode... conditions) { + return join(joinType, Lists.asList(condition0, conditions)); + } + + /** Creates a {@link org.apache.calcite.rel.core.Join} with multiple + * conditions. */ + public HiveReplicatedRelBuilder join(JoinRelType joinType, + Iterable conditions) { + return join(joinType, and(conditions), + ImmutableSet.of()); + } + + public HiveReplicatedRelBuilder join(JoinRelType joinType, RexNode condition) { + return join(joinType, condition, ImmutableSet.of()); + } + + /** Creates a correlation variable for the current input, and writes it into + * a Holder. */ + public HiveReplicatedRelBuilder variable(Holder v) { + v.set((RexCorrelVariable) + getRexBuilder().makeCorrel(peek().getRowType(), + cluster.createCorrel())); + return this; + } + + /** Returns a reference to a given field of a record-valued expression. */ + public RexNode field(RexNode e, String name) { + return getRexBuilder().makeFieldAccess(e, name, false); + } + + /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating + * variables. */ + public HiveReplicatedRelBuilder join(JoinRelType joinType, RexNode condition, + Set variablesSet) { + Frame right = stack.pop(); + final Frame left = stack.pop(); + final RelNode join; + final boolean correlate = variablesSet.size() == 1; + RexNode postCondition = literal(true); + if (correlate) { + final CorrelationId id = Iterables.getOnlyElement(variablesSet); + final ImmutableBitSet requiredColumns = + RelOptUtil.correlationColumns(id, right.rel); + if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) { + throw new IllegalArgumentException("variable " + id + + " must not be used by left input to correlation"); + } + switch (joinType) { + case LEFT: + // Correlate does not have an ON clause. + // For a LEFT correlate, predicate must be evaluated first. + // For INNER, we can defer. + stack.push(right); + filter(condition.accept(new Shifter(left.rel, id, right.rel))); + right = stack.pop(); + break; + default: + postCondition = condition; + } + join = correlateFactory.createCorrelate(left.rel, right.rel, id, + requiredColumns, SemiJoinType.of(joinType)); + } else { + join = joinFactory.createJoin(left.rel, right.rel, condition, + variablesSet, joinType, false); + } + final List> pairs = new ArrayList<>(); + pairs.addAll(left.right); + pairs.addAll(right.right); + stack.push(new Frame(join, ImmutableList.copyOf(pairs))); + filter(postCondition); + return this; + } + + /** Creates a {@link org.apache.calcite.rel.core.Join} using USING syntax. + * + *

For each of the field names, both left and right inputs must have a + * field of that name. Constructs a join condition that the left and right + * fields are equal. + * + * @param joinType Join type + * @param fieldNames Field names + */ + public HiveReplicatedRelBuilder join(JoinRelType joinType, String... fieldNames) { + final List conditions = new ArrayList<>(); + for (String fieldName : fieldNames) { + conditions.add( + call(SqlStdOperatorTable.EQUALS, + field(2, 0, fieldName), + field(2, 1, fieldName))); + } + return join(joinType, conditions); + } + + /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ + public HiveReplicatedRelBuilder semiJoin(Iterable conditions) { + final Frame right = stack.pop(); + final Frame left = stack.pop(); + final RelNode semiJoin = + semiJoinFactory.createSemiJoin(left.rel, right.rel, and(conditions)); + stack.push(new Frame(semiJoin, left.right)); + return this; + } + + /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ + public HiveReplicatedRelBuilder semiJoin(RexNode... conditions) { + return semiJoin(ImmutableList.copyOf(conditions)); + } + + /** Assigns a table alias to the top entry on the stack. */ + public HiveReplicatedRelBuilder as(String alias) { + final Frame pair = stack.pop(); + stack.push( + new Frame(pair.rel, + ImmutableList.of(Pair.of(alias, pair.right.get(0).right)))); + return this; + } + + /** Creates a {@link Values}. + * + *

The {@code values} array must have the same number of entries as + * {@code fieldNames}, or an integer multiple if you wish to create multiple + * rows. + * + *

If there are zero rows, or if all values of a any column are + * null, this method cannot deduce the type of columns. For these cases, + * call {@link #values(Iterable, RelDataType)}. + * + * @param fieldNames Field names + * @param values Values + */ + public HiveReplicatedRelBuilder values(String[] fieldNames, Object... values) { + if (fieldNames == null + || fieldNames.length == 0 + || values.length % fieldNames.length != 0 + || values.length < fieldNames.length) { + throw new IllegalArgumentException( + "Value count must be a positive multiple of field count"); + } + final int rowCount = values.length / fieldNames.length; + for (Ord fieldName : Ord.zip(fieldNames)) { + if (allNull(values, fieldName.i, fieldNames.length)) { + throw new IllegalArgumentException("All values of field '" + fieldName.e + + "' are null; cannot deduce type"); + } + } + final ImmutableList> tupleList = + tupleList(fieldNames.length, values); + final RelDataTypeFactory.FieldInfoBuilder rowTypeBuilder = + cluster.getTypeFactory().builder(); + for (final Ord fieldName : Ord.zip(fieldNames)) { + final String name = + fieldName.e != null ? fieldName.e : "expr$" + fieldName.i; + final RelDataType type = cluster.getTypeFactory().leastRestrictive( + new AbstractList() { + public RelDataType get(int index) { + return tupleList.get(index).get(fieldName.i).getType(); + } + + public int size() { + return rowCount; + } + }); + rowTypeBuilder.add(name, type); + } + final RelDataType rowType = rowTypeBuilder.build(); + return values(tupleList, rowType); + } + + private ImmutableList> tupleList(int columnCount, + Object[] values) { + final ImmutableList.Builder> listBuilder = + ImmutableList.builder(); + final List valueList = new ArrayList<>(); + for (int i = 0; i < values.length; i++) { + Object value = values[i]; + valueList.add((RexLiteral) literal(value)); + if ((i + 1) % columnCount == 0) { + listBuilder.add(ImmutableList.copyOf(valueList)); + valueList.clear(); + } + } + return listBuilder.build(); + } + + /** Returns whether all values for a given column are null. */ + private boolean allNull(Object[] values, int column, int columnCount) { + for (int i = column; i < values.length; i += columnCount) { + if (values[i] != null) { + return false; + } + } + return true; + } + + /** + * Empty relationship can be expressed in many different ways, e.g., + * filter(cond=false), empty LogicalValues(), etc. Calcite default implementation + * uses empty LogicalValues(); however, currently there is not an equivalent to + * this expression in Hive. Thus, we use limit 0, since Hive already includes + * optimizations that will do early pruning of the result tree when it is found, + * e.g., GlobalLimitOptimizer. + */ + public HiveReplicatedRelBuilder empty() { + final RelNode input = build(); + final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort( + input, RelCollations.of(), null, literal(0)); + return this.push(sort); + } + + + /** Creates a {@link Values} with a specified row type. + * + *

This method can handle cases that {@link #values(String[], Object...)} + * cannot, such as all values of a column being null, or there being zero + * rows. + * + * @param rowType Row type + * @param columnValues Values + */ + public HiveReplicatedRelBuilder values(RelDataType rowType, Object... columnValues) { + final ImmutableList> tupleList = + tupleList(rowType.getFieldCount(), columnValues); + RelNode values = valuesFactory.createValues(cluster, rowType, + ImmutableList.copyOf(tupleList)); + push(values); + return this; + } + + /** Creates a {@link Values} with a specified row type. + * + *

This method can handle cases that {@link #values(String[], Object...)} + * cannot, such as all values of a column being null, or there being zero + * rows. + * + * @param tupleList Tuple list + * @param rowType Row type + */ + public HiveReplicatedRelBuilder values(Iterable> tupleList, + RelDataType rowType) { + RelNode values = + valuesFactory.createValues(cluster, rowType, copy(tupleList)); + push(values); + return this; + } + + /** Creates a {@link Values} with a specified row type and + * zero rows. + * + * @param rowType Row type + */ + public HiveReplicatedRelBuilder values(RelDataType rowType) { + return values(ImmutableList.>of(), rowType); + } + + /** Converts an iterable of lists into an immutable list of immutable lists + * with the same contents. Returns the same object if possible. */ + private static ImmutableList> + copy(Iterable> tupleList) { + final ImmutableList.Builder> builder = + ImmutableList.builder(); + int changeCount = 0; + for (List literals : tupleList) { + final ImmutableList literals2 = + ImmutableList.copyOf(literals); + builder.add(literals2); + if (literals != literals2) { + ++changeCount; + } + } + if (changeCount == 0) { + // don't make a copy if we don't have to + //noinspection unchecked + return (ImmutableList>) tupleList; + } + return builder.build(); + } + + /** Creates a limit without a sort. */ + public HiveReplicatedRelBuilder limit(int offset, int fetch) { + return sortLimit(offset, fetch, ImmutableList.of()); + } + + /** Creates a {@link Sort} by field ordinals. + * + *

Negative fields mean descending: -1 means field(0) descending, + * -2 means field(1) descending, etc. + */ + public HiveReplicatedRelBuilder sort(int... fields) { + final ImmutableList.Builder builder = ImmutableList.builder(); + for (int field : fields) { + builder.add(field < 0 ? desc(field(-field - 1)) : field(field)); + } + return sortLimit(-1, -1, builder.build()); + } + + /** Creates a {@link Sort} by expressions. */ + public HiveReplicatedRelBuilder sort(RexNode... nodes) { + return sortLimit(-1, -1, ImmutableList.copyOf(nodes)); + } + + /** Creates a {@link Sort} by expressions. */ + public HiveReplicatedRelBuilder sort(Iterable nodes) { + return sortLimit(-1, -1, nodes); + } + + /** Creates a {@link Sort} by expressions, with limit and offset. */ + public HiveReplicatedRelBuilder sortLimit(int offset, int fetch, RexNode... nodes) { + return sortLimit(offset, fetch, ImmutableList.copyOf(nodes)); + } + + /** Creates a {@link Sort} by a list of expressions, with limit and offset. + * + * @param offset Number of rows to skip; non-positive means don't skip any + * @param fetch Maximum number of rows to fetch; negative means no limit + * @param nodes Sort expressions + */ + public HiveReplicatedRelBuilder sortLimit(int offset, int fetch, + Iterable nodes) { + final List fieldCollations = new ArrayList<>(); + final RelDataType inputRowType = peek().getRowType(); + final List extraNodes = projects(inputRowType); + final List originalExtraNodes = ImmutableList.copyOf(extraNodes); + for (RexNode node : nodes) { + fieldCollations.add( + collation(node, RelFieldCollation.Direction.ASCENDING, null, + extraNodes)); + } + final RexNode offsetNode = offset <= 0 ? null : literal(offset); + final RexNode fetchNode = fetch < 0 ? null : literal(fetch); + if (offsetNode == null && fetch == 0) { + return empty(); + } + if (offsetNode == null && fetchNode == null && fieldCollations.isEmpty()) { + return this; // sort is trivial + } + + final boolean addedFields = extraNodes.size() > originalExtraNodes.size(); + if (fieldCollations.isEmpty()) { + assert !addedFields; + RelNode top = peek(); + if (top instanceof Sort) { + final Sort sort2 = (Sort) top; + if (sort2.offset == null && sort2.fetch == null) { + stack.pop(); + push(sort2.getInput()); + final RelNode sort = + sortFactory.createSort(build(), sort2.collation, + offsetNode, fetchNode); + push(sort); + return this; + } + } + if (top instanceof Project) { + final Project project = (Project) top; + if (project.getInput() instanceof Sort) { + final Sort sort2 = (Sort) project.getInput(); + if (sort2.offset == null && sort2.fetch == null) { + stack.pop(); + push(sort2.getInput()); + final RelNode sort = + sortFactory.createSort(build(), sort2.collation, + offsetNode, fetchNode); + push(sort); + project(project.getProjects()); + return this; + } + } + } + } + if (addedFields) { + project(extraNodes); + } + final RelNode sort = + sortFactory.createSort(build(), RelCollations.of(fieldCollations), + offsetNode, fetchNode); + push(sort); + if (addedFields) { + project(originalExtraNodes); + } + return this; + } + + private static RelFieldCollation collation(RexNode node, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection, List extraNodes) { + switch (node.getKind()) { + case INPUT_REF: + return new RelFieldCollation(((RexInputRef) node).getIndex(), direction, + Util.first(nullDirection, direction.defaultNullDirection())); + case DESCENDING: + return collation(((RexCall) node).getOperands().get(0), + RelFieldCollation.Direction.DESCENDING, + nullDirection, extraNodes); + case NULLS_FIRST: + return collation(((RexCall) node).getOperands().get(0), direction, + RelFieldCollation.NullDirection.FIRST, extraNodes); + case NULLS_LAST: + return collation(((RexCall) node).getOperands().get(0), direction, + RelFieldCollation.NullDirection.LAST, extraNodes); + default: + final int fieldIndex = extraNodes.size(); + extraNodes.add(node); + return new RelFieldCollation(fieldIndex, direction, + Util.first(nullDirection, direction.defaultNullDirection())); + } + } + + /** + * Creates a projection that converts the current relational expression's + * output to a desired row type. + * + * @param castRowType row type after cast + * @param rename if true, use field names from castRowType; if false, + * preserve field names from rel + */ + public HiveReplicatedRelBuilder convert(RelDataType castRowType, boolean rename) { + final RelNode r = build(); + final RelNode r2 = + RelOptUtil.createCastRel(r, castRowType, rename, projectFactory); + push(r2); + return this; + } + + public HiveReplicatedRelBuilder permute(Mapping mapping) { + assert mapping.getMappingType().isSingleSource(); + assert mapping.getMappingType().isMandatorySource(); + if (mapping.isIdentity()) { + return this; + } + final List exprList = Lists.newArrayList(); + for (int i = 0; i < mapping.getTargetCount(); i++) { + exprList.add(field(mapping.getSource(i))); + } + return project(exprList); + } + + public HiveReplicatedRelBuilder aggregate(GroupKey groupKey, + List aggregateCalls) { + return aggregate(groupKey, + Lists.transform( + aggregateCalls, new Function() { + public AggCall apply(AggregateCall input) { + return new AggCallImpl2(input); + } + })); + } + + /** Clears the stack. + * + *

The builder's state is now the same as when it was created. */ + public void clear() { + stack.clear(); + } + + protected String getAlias() { + final Frame frame = stack.peek(); + return frame.right.size() == 1 + ? frame.right.get(0).left + : null; + } + + /** Information necessary to create a call to an aggregate function. + * + * @see RelBuilder#aggregateCall */ + public interface AggCall { + } + + /** Information necessary to create the GROUP BY clause of an Aggregate. + * + * @see RelBuilder#groupKey */ + public interface GroupKey { + /** Assigns an alias to this group key. + * + *

Used to assign field names in the {@code group} operation. */ + GroupKey alias(String alias); + } + + /** Implementation of {@link RelBuilder.GroupKey}. */ + protected static class GroupKeyImpl implements GroupKey { + final ImmutableList nodes; + final boolean indicator; + final ImmutableList> nodeLists; + final String alias; + + GroupKeyImpl(ImmutableList nodes, boolean indicator, + ImmutableList> nodeLists, String alias) { + this.nodes = Preconditions.checkNotNull(nodes); + this.indicator = indicator; + this.nodeLists = nodeLists; + this.alias = alias; + } + + @Override public String toString() { + return alias == null ? nodes.toString() : nodes + " as " + alias; + } + + public GroupKey alias(String alias) { + return Objects.equals(this.alias, alias) + ? this + : new GroupKeyImpl(nodes, indicator, nodeLists, alias); + } + } + + /** Implementation of {@link RelBuilder.AggCall}. */ + private static class AggCallImpl implements AggCall { + private final SqlAggFunction aggFunction; + private final boolean distinct; + private final RexNode filter; + private final String alias; + private final ImmutableList operands; + + AggCallImpl(SqlAggFunction aggFunction, boolean distinct, RexNode filter, + String alias, ImmutableList operands) { + this.aggFunction = aggFunction; + this.distinct = distinct; + this.filter = filter; + this.alias = alias; + this.operands = operands; + } + } + + /** Implementation of {@link RelBuilder.AggCall} that wraps an + * {@link AggregateCall}. */ + private static class AggCallImpl2 implements AggCall { + private final AggregateCall aggregateCall; + + AggCallImpl2(AggregateCall aggregateCall) { + this.aggregateCall = Preconditions.checkNotNull(aggregateCall); + } + } + + /** Builder stack frame. + * + *

Describes a previously created relational expression and + * information about how table aliases map into its row type. */ + private static class Frame { + static final Function, List> FN = + new Function, List>() { + public List apply(Pair input) { + return input.right.getFieldList(); + } + }; + + final RelNode rel; + final ImmutableList> right; + + private Frame(RelNode rel, ImmutableList> pairs) { + this.rel = rel; + this.right = pairs; + } + + private Frame(RelNode rel) { + this(rel, ImmutableList.of(Pair.of(deriveAlias(rel), rel.getRowType()))); + } + + private static String deriveAlias(RelNode rel) { + if (rel instanceof TableScan) { + final List names = rel.getTable().getQualifiedName(); + if (!names.isEmpty()) { + return Util.last(names); + } + } + return null; + } + + List fields() { + return CompositeList.ofCopy(Iterables.transform(right, FN)); + } + } + + /** Shuttle that shifts a predicate's inputs to the left, replacing early + * ones with references to a + * {@link org.apache.calcite.rex.RexCorrelVariable}. */ + private class Shifter extends RexShuttle { + private final RelNode left; + private final CorrelationId id; + private final RelNode right; + + Shifter(RelNode left, CorrelationId id, RelNode right) { + this.left = left; + this.id = id; + this.right = right; + } + + public RexNode visitInputRef(RexInputRef inputRef) { + final RelDataType leftRowType = left.getRowType(); + final RexBuilder rexBuilder = getRexBuilder(); + final int leftCount = leftRowType.getFieldCount(); + if (inputRef.getIndex() < leftCount) { + final RexNode v = rexBuilder.makeCorrel(leftRowType, id); + return rexBuilder.makeFieldAccess(v, inputRef.getIndex()); + } else { + return rexBuilder.makeInputRef(right, inputRef.getIndex() - leftCount); + } + } + } +} + +// End RelBuilder.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java index 0410c91..cae25fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java @@ -22,10 +22,14 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.*; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.calcite.rel.core.CorrelationId; +import java.util.*; public class HiveFilter extends Filter implements HiveRelNode { @@ -48,4 +52,72 @@ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { return mq.getNonCumulativeCost(this); } + private void findCorrelatedVar(RexNode node, Set allVars) + { + if(node instanceof RexCall) + { + RexCall nd = (RexCall)node; + for (RexNode rn : nd.getOperands()) { + if (rn instanceof RexFieldAccess) + { + final RexNode ref = ((RexFieldAccess) rn).getReferenceExpr(); + assert(ref instanceof RexCorrelVariable); + allVars.add(((RexCorrelVariable) ref).id); + } + else { + findCorrelatedVar(rn, allVars); + } + } + } + } + + //traverse the given node to find all correlated variables + // Note that correlated variables are supported in Filter only i.e. Where & Having + private void traverseFilter(RexNode node, Set allVars) + { + if(node instanceof RexSubQuery) + { + //we expect correlated variables in HiveFilter only for now. Also check for case where operator has 0 inputs .e.g TableScan + RelNode input = ((RexSubQuery)node).rel.getInput(0); + while( input != null && !(input instanceof HiveFilter) && input.getInputs().size() >=1) + { + //we don't expect corr vars withing JOIN or UNION for now + // we only expect cor vars in top level filter + if( input.getInputs().size() > 1) + return; + input = input.getInput(0); + } + if(input != null && input instanceof HiveFilter ) + { + findCorrelatedVar(((HiveFilter)input).getCondition(), allVars); + } + return; + } + //AND, NOT etc + if(node instanceof RexCall) + { + int numOperands = ((RexCall)node).getOperands().size(); + for(int i=0; i getVariablesSet() { + Set allCorrVars = new HashSet<>(); + traverseFilter(condition, allCorrVars); + return allCorrVars; + } + + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java index ba9483e..b175189 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java @@ -35,6 +35,7 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm; @@ -230,4 +232,13 @@ public RelWriter explainTerms(RelWriter pw) { .item("cost", joinCost == null ? "not available" : joinCost); } + + //required for HiveRelDecorrelator + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java index 3e0a9a6..7935e96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java @@ -27,6 +27,7 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import com.google.common.collect.ImmutableList; @@ -196,4 +198,13 @@ public boolean isSynthetic() { return isSysnthetic; } + //required for HiveRelDecorrelator + @Override public RelNode accept(RelShuttle shuttle) { + if(shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java index 6ed2914..e795303 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java @@ -24,8 +24,10 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import com.google.common.collect.ImmutableMap; @@ -107,4 +109,13 @@ public void setRuleCreated(boolean ruleCreated) { this.ruleCreated = ruleCreated; } + //required for HiveRelDecorrelator + public RelNode accept(RelShuttle shuttle) { + if (shuttle instanceof HiveRelShuttle) + { + return ((HiveRelShuttle)shuttle).visit(this); + } + return shuttle.visit(this); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java new file mode 100644 index 0000000..ed56477 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -0,0 +1,2996 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.linq4j.function.Function2; +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCostImpl; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.BiRel; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.FilterCorrelateRule; +import org.apache.calcite.rel.rules.FilterJoinRule; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlCountAggFunction; +import org.apache.calcite.sql.fun.SqlSingleValueAggFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.Bug; +import org.apache.calcite.util.Holder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Litmus; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.ReflectUtil; +import org.apache.calcite.util.ReflectiveVisitor; +import org.apache.calcite.util.Stacks; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.*; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; +import com.google.common.collect.Sets; +import com.google.common.collect.SortedSetMultimap; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Level; + +/** + * NOTE: this whole logic is replicated from Calcite's RelDecorrelator + * and is exteneded to make it suitable for HIVE + * TODO: + * We should get rid of this and replace it with Calcite's RelDecorrelator + * once that works with Join, Project etc instead of LogicalJoin, LogicalProject. + * Also we need to have CALCITE-1511 fixed + * + * RelDecorrelator replaces all correlated expressions (corExp) in a relational + * expression (RelNode) tree with non-correlated expressions that are produced + * from joining the RelNode that produces the corExp with the RelNode that + * references it. + * + *

TODO:

+ *
    + *
  • replace {@code CorelMap} constructor parameter with a RelNode + *
  • make {@link #currentRel} immutable (would require a fresh + * RelDecorrelator for each node being decorrelated)
  • + *
  • make fields of {@code CorelMap} immutable
  • + *
  • make sub-class rules static, and have them create their own + * de-correlator
  • + *
+ */ +public class HiveRelDecorrelator implements ReflectiveVisitor { + //~ Static fields/initializers --------------------------------------------- + + protected static final Logger LOG = LoggerFactory.getLogger( + HiveRelDecorrelator.class); + + //~ Instance fields -------------------------------------------------------- + + private final RelBuilder relBuilder; + + // map built during translation + private CorelMap cm; + + private final ReflectUtil.MethodDispatcher dispatcher = + ReflectUtil.createMethodDispatcher(Frame.class, this, "decorrelateRel", + RelNode.class); + + private final RexBuilder rexBuilder; + + // The rel which is being visited + private RelNode currentRel; + + private final Context context; + + /** Built during decorrelation, of rel to all the newly created correlated + * variables in its output, and to map old input positions to new input + * positions. This is from the view point of the parent rel of a new rel. */ + private final Map map = new HashMap<>(); + + private final HashSet generatedCorRels = Sets.newHashSet(); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelDecorrelator ( + RelOptCluster cluster, + CorelMap cm, + Context context) { + this.cm = cm; + this.rexBuilder = cluster.getRexBuilder(); + this.context = context; + relBuilder = RelFactories.LOGICAL_BUILDER.create(cluster, null); + + } + + //~ Methods ---------------------------------------------------------------- + + /** Decorrelates a query. + * + *

This is the main entry point to {@code RelDecorrelator}. + * + * @param rootRel Root node of the query + * + * @return Equivalent query with all + * {@link org.apache.calcite.rel.logical.LogicalCorrelate} instances removed + */ + public static RelNode decorrelateQuery(RelNode rootRel) { + final CorelMap corelMap = new CorelMapBuilder().build(rootRel); + if (!corelMap.hasCorrelation()) { + return rootRel; + } + + final RelOptCluster cluster = rootRel.getCluster(); + final HiveRelDecorrelator decorrelator = + new HiveRelDecorrelator(cluster, corelMap, + cluster.getPlanner().getContext()); + + RelNode newRootRel = decorrelator.removeCorrelationViaRule(rootRel); + + if (!decorrelator.cm.mapCorVarToCorRel.isEmpty()) { + newRootRel = decorrelator.decorrelate(newRootRel); + } + + return newRootRel; + } + + private void setCurrent(RelNode root, LogicalCorrelate corRel) { + currentRel = corRel; + if (corRel != null) { + cm = new CorelMapBuilder().build(Util.first(root, corRel)); + } + } + + private RelNode decorrelate(RelNode root) { + // first adjust count() expression if any + HepProgram program = HepProgram.builder() + .addRuleInstance(new AdjustProjectForCountAggregateRule(false)) + .addRuleInstance(new AdjustProjectForCountAggregateRule(true)) + .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN) + .addRuleInstance(FilterProjectTransposeRule.INSTANCE) + .addRuleInstance(FilterCorrelateRule.INSTANCE) + .build(); + + HepPlanner planner = createPlanner(program); + + planner.setRoot(root); + root = planner.findBestExp(); + + // Perform decorrelation. + map.clear(); + + final Frame frame = getInvoke(root, null); + if (frame != null) { + // has been rewritten; apply rules post-decorrelation + final HepProgram program2 = HepProgram.builder() + .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN) + .addRuleInstance(FilterJoinRule.JOIN) + .build(); + + final HepPlanner planner2 = createPlanner(program2); + final RelNode newRoot = frame.r; + planner2.setRoot(newRoot); + return planner2.findBestExp(); + } + + return root; + } + + private Function2 createCopyHook() { + return new Function2() { + public Void apply(RelNode oldNode, RelNode newNode) { + if (cm.mapRefRelToCorVar.containsKey(oldNode)) { + cm.mapRefRelToCorVar.putAll(newNode, + cm.mapRefRelToCorVar.get(oldNode)); + } + if (oldNode instanceof LogicalCorrelate + && newNode instanceof LogicalCorrelate) { + LogicalCorrelate oldCor = (LogicalCorrelate) oldNode; + CorrelationId c = oldCor.getCorrelationId(); + if (cm.mapCorVarToCorRel.get(c) == oldNode) { + cm.mapCorVarToCorRel.put(c, newNode); + } + + if (generatedCorRels.contains(oldNode)) { + generatedCorRels.add((LogicalCorrelate) newNode); + } + } + return null; + } + }; + } + + private HepPlanner createPlanner(HepProgram program) { + // Create a planner with a hook to update the mapping tables when a + // node is copied when it is registered. + return new HepPlanner( + program, + context, + true, + createCopyHook(), + RelOptCostImpl.FACTORY); + } + + public RelNode removeCorrelationViaRule(RelNode root) { + HepProgram program = HepProgram.builder() + .addRuleInstance(new RemoveSingleAggregateRule()) + .addRuleInstance(new RemoveCorrelationForScalarProjectRule()) + .addRuleInstance(new RemoveCorrelationForScalarAggregateRule()) + .build(); + + HepPlanner planner = createPlanner(program); + + planner.setRoot(root); + return planner.findBestExp(); + } + + protected RexNode decorrelateExpr(RexNode exp) { + DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, null, ImmutableSet.of()); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator, + RexInputRef nullIndicator) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, nullIndicator, + ImmutableSet.of()); + return exp.accept(shuttle); + } + + protected RexNode removeCorrelationExpr( + RexNode exp, + boolean projectPulledAboveLeftCorrelator, + Set isCount) { + RemoveCorrelationRexShuttle shuttle = + new RemoveCorrelationRexShuttle(rexBuilder, + projectPulledAboveLeftCorrelator, null, isCount); + return exp.accept(shuttle); + } + + /** Fallback if none of the other {@code decorrelateRel} methods match. */ + public Frame decorrelateRel(RelNode rel) { + RelNode newRel = rel.copy(rel.getTraitSet(), rel.getInputs()); + + if (rel.getInputs().size() > 0) { + List oldInputs = rel.getInputs(); + List newInputs = Lists.newArrayList(); + for (int i = 0; i < oldInputs.size(); ++i) { + final Frame frame = getInvoke(oldInputs.get(i), rel); + if (frame == null || !frame.corVarOutputPos.isEmpty()) { + // if input is not rewritten, or if it produces correlated + // variables, terminate rewrite + return null; + } + newInputs.add(frame.r); + newRel.replaceInput(i, frame.r); + } + + if (!Util.equalShallow(oldInputs, newInputs)) { + newRel = rel.copy(rel.getTraitSet(), newInputs); + } + } + + // the output position should not change since there are no corVars + // coming from below. + return register(rel, newRel, identityMap(rel.getRowType().getFieldCount()), + ImmutableSortedMap.of()); + } + + /** + * Rewrite Sort. + * + * @param rel Sort to be rewritten + */ + public Frame decorrelateRel(HiveSortLimit rel) { + // + // Rewrite logic: + // + // 1. change the collations field to reference the new input. + // + + // Sort itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + // Sort only references field positions in collations field. + // The collations field in the newRel now need to refer to the + // new output positions in its input. + // Its output does not change the input ordering, so there's no + // need to call propagateExpr. + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final RelNode newInput = frame.r; + + Mappings.TargetMapping mapping = + Mappings.target( + frame.oldToNewOutputPos, + oldInput.getRowType().getFieldCount(), + newInput.getRowType().getFieldCount()); + + RelCollation oldCollation = rel.getCollation(); + RelCollation newCollation = RexUtil.apply(mapping, oldCollation); + + final RelNode newSort = HiveSortLimit.create(newInput, newCollation, rel.offset, rel.fetch); + + // Sort does not change input ordering + return register(rel, newSort, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + /** + * Rewrite Sort. + * + * @param rel Sort to be rewritten + */ + public Frame decorrelateRel(Sort rel) { + // + // Rewrite logic: + // + // 1. change the collations field to reference the new input. + // + + // Sort itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + // Sort only references field positions in collations field. + // The collations field in the newRel now need to refer to the + // new output positions in its input. + // Its output does not change the input ordering, so there's no + // need to call propagateExpr. + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final RelNode newInput = frame.r; + + Mappings.TargetMapping mapping = + Mappings.target( + frame.oldToNewOutputPos, + oldInput.getRowType().getFieldCount(), + newInput.getRowType().getFieldCount()); + + RelCollation oldCollation = rel.getCollation(); + RelCollation newCollation = RexUtil.apply(mapping, oldCollation); + + final RelNode newSort = HiveSortLimit.create(newInput, newCollation, rel.offset, rel.fetch); + + // Sort does not change input ordering + return register(rel, newSort, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + + /** + * Rewrites a {@link Values}. + * + * @param rel Values to be rewritten + */ + public Frame decorrelateRel(Values rel) { + // There are no inputs, so rel does not need to be changed. + return null; + } + + /** + * Rewrites a {@link LogicalAggregate}. + * + * @param rel Aggregate to rewrite + */ + public Frame decorrelateRel(LogicalAggregate rel) throws SemanticException{ + if (rel.getGroupType() != Aggregate.Group.SIMPLE) { + throw new AssertionError(Bug.CALCITE_461_FIXED); + } + // + // Rewrite logic: + // + // 1. Permute the group by keys to the front. + // 2. If the input of an aggregate produces correlated variables, + // add them to the group list. + // 3. Change aggCalls to reference the new project. + // + + // Aggregate itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + //I think this is a bug in Calcite where Aggregate seems to always expect + // correlated variable in nodes underneath it which is not true for queries such as + // select p.empno, li.mgr from (select distinct empno as empno from emp) p join emp li on p.empno= li.empno where li.sal = 1 + // and li.deptno in (select deptno from emp where JOB = 'AIR' AND li.mgr=mgr) + + //assert !frame.corVarOutputPos.isEmpty(); + final RelNode newInput = frame.r; + + // map from newInput + Map mapNewInputToProjOutputPos = Maps.newHashMap(); + final int oldGroupKeyCount = rel.getGroupSet().cardinality(); + + // Project projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + List newInputOutput = + newInput.getRowType().getFieldList(); + + int newPos = 0; + + // oldInput has the original group by keys in the front. + final NavigableMap omittedConstants = new TreeMap<>(); + for (int i = 0; i < oldGroupKeyCount; i++) { + final RexLiteral constant = projectedLiteral(newInput, i); + if (constant != null) { + // Exclude constants. Aggregate({true}) occurs because Aggregate({}) + // would generate 1 row even when applied to an empty table. + omittedConstants.put(i, constant); + continue; + } + int newInputPos = frame.oldToNewOutputPos.get(i); + projects.add(RexInputRef.of2(newInputPos, newInputOutput)); + mapNewInputToProjOutputPos.put(newInputPos, newPos); + newPos++; + } + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + if (!frame.corVarOutputPos.isEmpty()) { + // If input produces correlated variables, move them to the front, + // right after any existing GROUP BY fields. + + // Now add the corVars from the input, starting from + // position oldGroupKeyCount. + for (Map.Entry entry + : frame.corVarOutputPos.entrySet()) { + projects.add(RexInputRef.of2(entry.getValue(), newInputOutput)); + + mapCorVarToOutputPos.put(entry.getKey(), newPos); + mapNewInputToProjOutputPos.put(entry.getValue(), newPos); + newPos++; + } + } + + // add the remaining fields + final int newGroupKeyCount = newPos; + for (int i = 0; i < newInputOutput.size(); i++) { + if (!mapNewInputToProjOutputPos.containsKey(i)) { + projects.add(RexInputRef.of2(i, newInputOutput)); + mapNewInputToProjOutputPos.put(i, newPos); + newPos++; + } + } + + assert newPos == newInputOutput.size(); + + // This Project will be what the old input maps to, + // replacing any previous mapping from old input). + + RelNode newProject = HiveProject.create(newInput, Pair.left(projects), Pair.right(projects)); + + // update mappings: + // oldInput ----> newInput + // + // newProject + // | + // oldInput ----> newInput + // + // is transformed to + // + // oldInput ----> newProject + // | + // newInput + Map combinedMap = Maps.newHashMap(); + + for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) { + combinedMap.put(oldInputPos, + mapNewInputToProjOutputPos.get( + frame.oldToNewOutputPos.get(oldInputPos))); + } + + register(oldInput, newProject, combinedMap, mapCorVarToOutputPos); + + // now it's time to rewrite the Aggregate + final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount); + List newAggCalls = Lists.newArrayList(); + List oldAggCalls = rel.getAggCallList(); + + int oldInputOutputFieldCount = rel.getGroupSet().cardinality(); + int newInputOutputFieldCount = newGroupSet.cardinality(); + + int i = -1; + for (AggregateCall oldAggCall : oldAggCalls) { + ++i; + List oldAggArgs = oldAggCall.getArgList(); + + List aggArgs = Lists.newArrayList(); + + // Adjust the aggregator argument positions. + // Note aggregator does not change input ordering, so the input + // output position mapping can be used to derive the new positions + // for the argument. + for (int oldPos : oldAggArgs) { + aggArgs.add(combinedMap.get(oldPos)); + } + final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg + : combinedMap.get(oldAggCall.filterArg); + + newAggCalls.add( + oldAggCall.adaptTo(newProject, aggArgs, filterArg, + oldGroupKeyCount, newGroupKeyCount)); + + // The old to new output position mapping will be the same as that + // of newProject, plus any aggregates that the oldAgg produces. + combinedMap.put( + oldInputOutputFieldCount + i, + newInputOutputFieldCount + i); + } + + relBuilder.push( + LogicalAggregate.create(newProject, + false, + newGroupSet, + null, + newAggCalls)); + + if (!omittedConstants.isEmpty()) { + final List postProjects = new ArrayList<>(relBuilder.fields()); + for (Map.Entry entry + : omittedConstants.descendingMap().entrySet()) { + postProjects.add(entry.getKey() + frame.corVarOutputPos.size(), + entry.getValue()); + } + relBuilder.project(postProjects); + } + + // Aggregate does not change input ordering so corVars will be + // located at the same position as the input newProject. + return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos); + } + + public Frame getInvoke(RelNode r, RelNode parent) { + final Frame frame = dispatcher.invoke(r); + if (frame != null) { + map.put(r, frame); + } + currentRel = parent; + return frame; + } + + /** Returns a literal output field, or null if it is not literal. */ + private static RexLiteral projectedLiteral(RelNode rel, int i) { + if (rel instanceof Project) { + final Project project = (Project) rel; + final RexNode node = project.getProjects().get(i); + if (node instanceof RexLiteral) { + return (RexLiteral) node; + } + } + return null; + } + + public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{ + { + if (rel.getGroupType() != Aggregate.Group.SIMPLE) { + throw new AssertionError(Bug.CALCITE_461_FIXED); + } + // + // Rewrite logic: + // + // 1. Permute the group by keys to the front. + // 2. If the input of an aggregate produces correlated variables, + // add them to the group list. + // 3. Change aggCalls to reference the new project. + // + + // Aggregate itself should not reference cor vars. + assert !cm.mapRefRelToCorVar.containsKey(rel); + + final RelNode oldInput = rel.getInput(); + final Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + //assert !frame.corVarOutputPos.isEmpty(); + final RelNode newInput = frame.r; + + // map from newInput + Map mapNewInputToProjOutputPos = Maps.newHashMap(); + final int oldGroupKeyCount = rel.getGroupSet().cardinality(); + + // Project projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + List newInputOutput = + newInput.getRowType().getFieldList(); + + int newPos = 0; + + // oldInput has the original group by keys in the front. + final NavigableMap omittedConstants = new TreeMap<>(); + for (int i = 0; i < oldGroupKeyCount; i++) { + final RexLiteral constant = projectedLiteral(newInput, i); + if (constant != null) { + // Exclude constants. Aggregate({true}) occurs because Aggregate({}) + // would generate 1 row even when applied to an empty table. + omittedConstants.put(i, constant); + continue; + } + int newInputPos = frame.oldToNewOutputPos.get(i); + projects.add(RexInputRef.of2(newInputPos, newInputOutput)); + mapNewInputToProjOutputPos.put(newInputPos, newPos); + newPos++; + } + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + if (!frame.corVarOutputPos.isEmpty()) { + // If input produces correlated variables, move them to the front, + // right after any existing GROUP BY fields. + + // Now add the corVars from the input, starting from + // position oldGroupKeyCount. + for (Map.Entry entry + : frame.corVarOutputPos.entrySet()) { + projects.add(RexInputRef.of2(entry.getValue(), newInputOutput)); + + mapCorVarToOutputPos.put(entry.getKey(), newPos); + mapNewInputToProjOutputPos.put(entry.getValue(), newPos); + newPos++; + } + } + + // add the remaining fields + final int newGroupKeyCount = newPos; + for (int i = 0; i < newInputOutput.size(); i++) { + if (!mapNewInputToProjOutputPos.containsKey(i)) { + projects.add(RexInputRef.of2(i, newInputOutput)); + mapNewInputToProjOutputPos.put(i, newPos); + newPos++; + } + } + + assert newPos == newInputOutput.size(); + + // This Project will be what the old input maps to, + // replacing any previous mapping from old input). + RelNode newProject = HiveProject.create(newInput, Pair.left(projects), Pair.right(projects)); + + // update mappings: + // oldInput ----> newInput + // + // newProject + // | + // oldInput ----> newInput + // + // is transformed to + // + // oldInput ----> newProject + // | + // newInput + Map combinedMap = Maps.newHashMap(); + + for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) { + combinedMap.put(oldInputPos, + mapNewInputToProjOutputPos.get( + frame.oldToNewOutputPos.get(oldInputPos))); + } + + register(oldInput, newProject, combinedMap, mapCorVarToOutputPos); + + // now it's time to rewrite the Aggregate + final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount); + List newAggCalls = Lists.newArrayList(); + List oldAggCalls = rel.getAggCallList(); + + int oldInputOutputFieldCount = rel.getGroupSet().cardinality(); + int newInputOutputFieldCount = newGroupSet.cardinality(); + + int i = -1; + for (AggregateCall oldAggCall : oldAggCalls) { + ++i; + List oldAggArgs = oldAggCall.getArgList(); + + List aggArgs = Lists.newArrayList(); + + // Adjust the aggregator argument positions. + // Note aggregator does not change input ordering, so the input + // output position mapping can be used to derive the new positions + // for the argument. + for (int oldPos : oldAggArgs) { + aggArgs.add(combinedMap.get(oldPos)); + } + final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg + : combinedMap.get(oldAggCall.filterArg); + + newAggCalls.add( + oldAggCall.adaptTo(newProject, aggArgs, filterArg, + oldGroupKeyCount, newGroupKeyCount)); + + // The old to new output position mapping will be the same as that + // of newProject, plus any aggregates that the oldAgg produces. + combinedMap.put( + oldInputOutputFieldCount + i, + newInputOutputFieldCount + i); + } + + relBuilder.push( + new HiveAggregate(rel.getCluster(), rel.getTraitSet(), newProject, false, newGroupSet, null, newAggCalls) ); + + if (!omittedConstants.isEmpty()) { + final List postProjects = new ArrayList<>(relBuilder.fields()); + for (Map.Entry entry + : omittedConstants.descendingMap().entrySet()) { + postProjects.add(entry.getKey() + frame.corVarOutputPos.size(), + entry.getValue()); + } + relBuilder.project(postProjects); + } + + // Aggregate does not change input ordering so corVars will be + // located at the same position as the input newProject. + return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos); + } + } + + public Frame decorrelateRel(HiveProject rel) throws SemanticException{ + { + // + // Rewrite logic: + // + // 1. Pass along any correlated variables coming from the input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final List oldProjects = rel.getProjects(); + final List relOutput = rel.getRowType().getFieldList(); + + // LogicalProject projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + // If this LogicalProject has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the LogicalJoin created by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // LogicalProject projects the original expressions + final Map mapOldToNewOutputPos = Maps.newHashMap(); + int newPos; + for (newPos = 0; newPos < oldProjects.size(); newPos++) { + projects.add( + newPos, + Pair.of( + decorrelateExpr(oldProjects.get(newPos)), + relOutput.get(newPos).getName())); + mapOldToNewOutputPos.put(newPos, newPos); + } + + + // Project any correlated variables the input wants to pass along. + // There could be situation e.g. multiple correlated variables refering to + // same outer variable, in which case Project will be created with multiple + // fields with same name. Hive doesn't allow HiveProject with multiple fields + // having same name. So to avoid that we keep a set of all fieldnames and + // on encountering an existing one a new field/column name is generated + final Set corrFieldName = Sets.newHashSet(); + int pos = 0; + + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + for (Map.Entry entry : frame.corVarOutputPos.entrySet()) { + final RelDataTypeField field = frame.r.getRowType().getFieldList().get(entry.getValue()); + RexNode projectChild = (RexNode) new RexInputRef(entry.getValue(), field.getType()); + String fieldName = field.getName(); + if(corrFieldName.contains(fieldName)) + { + fieldName = SemanticAnalyzer.getColumnInternalName(pos++); + } + + projects.add(Pair.of(projectChild ,fieldName)); + corrFieldName.add(fieldName); + mapCorVarToOutputPos.put(entry.getKey(), newPos); + newPos++; + } + + RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects)); + + return register(rel, newProject, mapOldToNewOutputPos, + mapCorVarToOutputPos); + } + } + /** + * Rewrite LogicalProject. + * + * @param rel the project rel to rewrite + */ + public Frame decorrelateRel(LogicalProject rel) throws SemanticException{ + // + // Rewrite logic: + // + // 1. Pass along any correlated variables coming from the input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + final List oldProjects = rel.getProjects(); + final List relOutput = rel.getRowType().getFieldList(); + + // LogicalProject projects the original expressions, + // plus any correlated variables the input wants to pass along. + final List> projects = Lists.newArrayList(); + + // If this LogicalProject has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the LogicalJoin created by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // LogicalProject projects the original expressions + final Map mapOldToNewOutputPos = Maps.newHashMap(); + int newPos; + for (newPos = 0; newPos < oldProjects.size(); newPos++) { + projects.add( + newPos, + Pair.of( + decorrelateExpr(oldProjects.get(newPos)), + relOutput.get(newPos).getName())); + mapOldToNewOutputPos.put(newPos, newPos); + } + + // Project any correlated variables the input wants to pass along. + final SortedMap mapCorVarToOutputPos = new TreeMap<>(); + for (Map.Entry entry : frame.corVarOutputPos.entrySet()) { + projects.add( + RexInputRef.of2(entry.getValue(), + frame.r.getRowType().getFieldList())); + mapCorVarToOutputPos.put(entry.getKey(), newPos); + newPos++; + } + + RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects)); + + return register(rel, newProject, mapOldToNewOutputPos, + mapCorVarToOutputPos); + } + + /** + * Create RelNode tree that produces a list of correlated variables. + * + * @param correlations correlated variables to generate + * @param valueGenFieldOffset offset in the output that generated columns + * will start + * @param mapCorVarToOutputPos output positions for the correlated variables + * generated + * @return RelNode the root of the resultant RelNode tree + */ + private RelNode createValueGenerator( + Iterable correlations, + int valueGenFieldOffset, + SortedMap mapCorVarToOutputPos) { + final Map> mapNewInputToOutputPos = + new HashMap<>(); + + final Map mapNewInputToNewOffset = new HashMap<>(); + + // Input provides the definition of a correlated variable. + // Add to map all the referenced positions (relative to each input rel). + for (Correlation corVar : correlations) { + final int oldCorVarOffset = corVar.field; + + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final Frame frame = map.get(oldInput); + assert frame != null; + final RelNode newInput = frame.r; + + final List newLocalOutputPosList; + if (!mapNewInputToOutputPos.containsKey(newInput)) { + newLocalOutputPosList = Lists.newArrayList(); + } else { + newLocalOutputPosList = + mapNewInputToOutputPos.get(newInput); + } + + final int newCorVarOffset = frame.oldToNewOutputPos.get(oldCorVarOffset); + + // Add all unique positions referenced. + if (!newLocalOutputPosList.contains(newCorVarOffset)) { + newLocalOutputPosList.add(newCorVarOffset); + } + mapNewInputToOutputPos.put(newInput, newLocalOutputPosList); + } + + int offset = 0; + + // Project only the correlated fields out of each inputRel + // and join the projectRel together. + // To make sure the plan does not change in terms of join order, + // join these rels based on their occurrence in cor var list which + // is sorted. + final Set joinedInputRelSet = Sets.newHashSet(); + + RelNode r = null; + for (Correlation corVar : correlations) { + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final RelNode newInput = map.get(oldInput).r; + assert newInput != null; + + if (!joinedInputRelSet.contains(newInput)) { + RelNode project = + RelOptUtil.createProject( + newInput, + mapNewInputToOutputPos.get(newInput)); + RelNode distinct = RelOptUtil.createDistinctRel(project); + RelOptCluster cluster = distinct.getCluster(); + + joinedInputRelSet.add(newInput); + mapNewInputToNewOffset.put(newInput, offset); + offset += distinct.getRowType().getFieldCount(); + + if (r == null) { + r = distinct; + } else { + r = + LogicalJoin.create(r, distinct, + cluster.getRexBuilder().makeLiteral(true), + ImmutableSet.of(), JoinRelType.INNER); + } + } + } + + // Translate the positions of correlated variables to be relative to + // the join output, leaving room for valueGenFieldOffset because + // valueGenerators are joined with the original left input of the rel + // referencing correlated variables. + for (Correlation corVar : correlations) { + // The first input of a Correlator is always the rel defining + // the correlated variables. + final RelNode oldInput = getCorRel(corVar); + assert oldInput != null; + final Frame frame = map.get(oldInput); + final RelNode newInput = frame.r; + assert newInput != null; + + final List newLocalOutputPosList = + mapNewInputToOutputPos.get(newInput); + + final int newLocalOutputPos = frame.oldToNewOutputPos.get(corVar.field); + + // newOutputPos is the index of the cor var in the referenced + // position list plus the offset of referenced position list of + // each newInput. + final int newOutputPos = + newLocalOutputPosList.indexOf(newLocalOutputPos) + + mapNewInputToNewOffset.get(newInput) + + valueGenFieldOffset; + + if (mapCorVarToOutputPos.containsKey(corVar)) { + assert mapCorVarToOutputPos.get(corVar) == newOutputPos; + } + mapCorVarToOutputPos.put(corVar, newOutputPos); + } + + return r; + } + + + //this returns the source of corVar i.e. Rel which produces cor var + // value. Therefore it is always LogicalCorrelate's left input which is outer query + private RelNode getCorRel(Correlation corVar) { + final RelNode r = cm.mapCorVarToCorRel.get(corVar.corr); + + RelNode ret = r.getInput(0); + return ret; + } + + private void decorrelateInputWithValueGenerator(RelNode rel) { + // currently only handles one input input + assert rel.getInputs().size() == 1; + RelNode oldInput = rel.getInput(0); + final Frame frame = map.get(oldInput); + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(frame.corVarOutputPos); + + final Collection corVarList = cm.mapRefRelToCorVar.get(rel); + + int leftInputOutputCount = frame.r.getRowType().getFieldCount(); + + // can directly add positions into mapCorVarToOutputPos since join + // does not change the output ordering from the inputs. + RelNode valueGen = + createValueGenerator( + corVarList, + leftInputOutputCount, + mapCorVarToOutputPos); + + RelNode join = + LogicalJoin.create(frame.r, valueGen, rexBuilder.makeLiteral(true), + ImmutableSet.of(), JoinRelType.INNER); + + // LogicalJoin or LogicalFilter does not change the old input ordering. All + // input fields from newLeftInput(i.e. the original input to the old + // LogicalFilter) are in the output and in the same position. + register(oldInput, join, frame.oldToNewOutputPos, mapCorVarToOutputPos); + } + + public Frame decorrelateRel(HiveFilter rel) throws SemanticException { + { + // + // Rewrite logic: + // + // 1. If a LogicalFilter references a correlated field in its filter + // condition, rewrite the LogicalFilter to be + // LogicalFilter + // LogicalJoin(cross product) + // OriginalFilterInput + // ValueGenerator(produces distinct sets of correlated variables) + // and rewrite the correlated fieldAccess in the filter condition to + // reference the LogicalJoin output. + // + // 2. If LogicalFilter does not reference correlated variables, simply + // rewrite the filter condition using new input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + // If this LogicalFilter has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the newly created LogicalJoin by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + RelNode newFilter = new HiveFilter(rel.getCluster(), rel.getTraitSet(), frame.r, + decorrelateExpr(rel.getCondition())); + + // Filter does not change the input ordering. + // Filter rel does not permute the input. + // All corvars produced by filter will have the same output positions in the + // input rel. + return register(rel, newFilter, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + } + + /** + * Rewrite LogicalFilter. + * + * @param rel the filter rel to rewrite + */ + public Frame decorrelateRel(LogicalFilter rel) { + // + // Rewrite logic: + // + // 1. If a LogicalFilter references a correlated field in its filter + // condition, rewrite the LogicalFilter to be + // LogicalFilter + // LogicalJoin(cross product) + // OriginalFilterInput + // ValueGenerator(produces distinct sets of correlated variables) + // and rewrite the correlated fieldAccess in the filter condition to + // reference the LogicalJoin output. + // + // 2. If LogicalFilter does not reference correlated variables, simply + // rewrite the filter condition using new input. + // + + final RelNode oldInput = rel.getInput(); + Frame frame = getInvoke(oldInput, rel); + if (frame == null) { + // If input has not been rewritten, do not rewrite this rel. + return null; + } + + // If this LogicalFilter has correlated reference, create value generator + // and produce the correlated variables in the new output. + if (cm.mapRefRelToCorVar.containsKey(rel)) { + decorrelateInputWithValueGenerator(rel); + + // The old input should be mapped to the newly created LogicalJoin by + // rewriteInputWithValueGenerator(). + frame = map.get(oldInput); + } + + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + RelNode newFilter = new HiveFilter(rel.getCluster(), rel.getTraitSet(), frame.r, + decorrelateExpr(rel.getCondition())); + + + // Filter does not change the input ordering. + // Filter rel does not permute the input. + // All corvars produced by filter will have the same output positions in the + // input rel. + return register(rel, newFilter, frame.oldToNewOutputPos, + frame.corVarOutputPos); + } + + /** + * Rewrite Correlator into a left outer join. + * + * @param rel Correlator + */ + public Frame decorrelateRel(LogicalCorrelate rel) { + // + // Rewrite logic: + // + // The original left input will be joined with the new right input that + // has generated correlated variables propagated up. For any generated + // cor vars that are not used in the join key, pass them along to be + // joined later with the CorrelatorRels that produce them. + // + + // the right input to Correlator should produce correlated variables + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + if (rightFrame.corVarOutputPos.isEmpty()) { + return null; + } + + assert rel.getRequiredColumns().cardinality() + <= rightFrame.corVarOutputPos.keySet().size(); + + // Change correlator rel into a join. + // Join all the correlated variables produced by this correlator rel + // with the values generated and propagated from the right input + final SortedMap corVarOutputPos = + new TreeMap<>(rightFrame.corVarOutputPos); + final List conditions = new ArrayList<>(); + final List newLeftOutput = + leftFrame.r.getRowType().getFieldList(); + int newLeftFieldCount = newLeftOutput.size(); + + final List newRightOutput = + rightFrame.r.getRowType().getFieldList(); + + for (Map.Entry rightOutputPos + : Lists.newArrayList(corVarOutputPos.entrySet())) { + final Correlation corVar = rightOutputPos.getKey(); + if (!corVar.corr.equals(rel.getCorrelationId())) { + continue; + } + final int newLeftPos = leftFrame.oldToNewOutputPos.get(corVar.field); + final int newRightPos = rightOutputPos.getValue(); + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + // remove this cor var from output position mapping + corVarOutputPos.remove(corVar); + } + + // Update the output position for the cor vars: only pass on the cor + // vars that are not used in the join key. + for (Correlation corVar : corVarOutputPos.keySet()) { + int newPos = corVarOutputPos.get(corVar) + newLeftFieldCount; + corVarOutputPos.put(corVar, newPos); + } + + // then add any cor var from the left input. Do not need to change + // output positions. + corVarOutputPos.putAll(leftFrame.corVarOutputPos); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + final Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put( + i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final RexNode condition = + RexUtil.composeConjunction(rexBuilder, conditions, false); + RelNode newJoin = + LogicalJoin.create(leftFrame.r, rightFrame.r, condition, + ImmutableSet.of(), rel.getJoinType().toJoinType()); + + return register(rel, newJoin, mapOldToNewOutputPos, corVarOutputPos); + } + + public Frame decorrelateRel(HiveJoin rel) throws SemanticException{ + // + // Rewrite logic: + // + // 1. rewrite join condition. + // 2. map output positions and produce cor vars if any. + // + + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType() ); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put(i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(leftFrame.corVarOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (Map.Entry entry + : rightFrame.corVarOutputPos.entrySet()) { + mapCorVarToOutputPos.put(entry.getKey(), + entry.getValue() + newLeftFieldCount); + } + return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos); + } + /** + * Rewrite LogicalJoin. + * + * @param rel LogicalJoin + */ + public Frame decorrelateRel(LogicalJoin rel) { + // + // Rewrite logic: + // + // 1. rewrite join condition. + // 2. map output positions and produce cor vars if any. + // + + final RelNode oldLeft = rel.getInput(0); + final RelNode oldRight = rel.getInput(1); + + final Frame leftFrame = getInvoke(oldLeft, rel); + final Frame rightFrame = getInvoke(oldRight, rel); + + if (leftFrame == null || rightFrame == null) { + // If any input has not been rewritten, do not rewrite this rel. + return null; + } + + final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, + rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType() ); + + // Create the mapping between the output of the old correlation rel + // and the new join rel + Map mapOldToNewOutputPos = Maps.newHashMap(); + + int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); + int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount(); + + int oldRightFieldCount = oldRight.getRowType().getFieldCount(); + assert rel.getRowType().getFieldCount() + == oldLeftFieldCount + oldRightFieldCount; + + // Left input positions are not changed. + mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputPos.put(i + oldLeftFieldCount, + rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount); + } + + final SortedMap mapCorVarToOutputPos = + new TreeMap<>(leftFrame.corVarOutputPos); + + // Right input positions are shifted by newLeftFieldCount. + for (Map.Entry entry + : rightFrame.corVarOutputPos.entrySet()) { + mapCorVarToOutputPos.put(entry.getKey(), + entry.getValue() + newLeftFieldCount); + } + return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos); + } + + private RexInputRef getNewForOldInputRef(RexInputRef oldInputRef) { + assert currentRel != null; + + int oldOrdinal = oldInputRef.getIndex(); + int newOrdinal = 0; + + // determine which input rel oldOrdinal references, and adjust + // oldOrdinal to be relative to that input rel + RelNode oldInput = null; + + for (RelNode oldInput0 : currentRel.getInputs()) { + RelDataType oldInputType = oldInput0.getRowType(); + int n = oldInputType.getFieldCount(); + if (oldOrdinal < n) { + oldInput = oldInput0; + break; + } + RelNode newInput = map.get(oldInput0).r; + newOrdinal += newInput.getRowType().getFieldCount(); + oldOrdinal -= n; + } + + assert oldInput != null; + + final Frame frame = map.get(oldInput); + assert frame != null; + + // now oldOrdinal is relative to oldInput + int oldLocalOrdinal = oldOrdinal; + + // figure out the newLocalOrdinal, relative to the newInput. + int newLocalOrdinal = oldLocalOrdinal; + + if (!frame.oldToNewOutputPos.isEmpty()) { + newLocalOrdinal = frame.oldToNewOutputPos.get(oldLocalOrdinal); + } + + newOrdinal += newLocalOrdinal; + + return new RexInputRef(newOrdinal, + frame.r.getRowType().getFieldList().get(newLocalOrdinal).getType()); + } + + /** + * Pulls project above the join from its RHS input. Enforces nullability + * for join output. + * + * @param join Join + * @param project Original project as the right-hand input of the join + * @param nullIndicatorPos Position of null indicator + * @return the subtree with the new LogicalProject at the root + */ + private RelNode projectJoinOutputWithNullability( + LogicalJoin join, + LogicalProject project, + int nullIndicatorPos) { + final RelDataTypeFactory typeFactory = join.getCluster().getTypeFactory(); + final RelNode left = join.getLeft(); + final JoinRelType joinType = join.getJoinType(); + + RexInputRef nullIndicator = + new RexInputRef( + nullIndicatorPos, + typeFactory.createTypeWithNullability( + join.getRowType().getFieldList().get(nullIndicatorPos) + .getType(), + true)); + + // now create the new project + List> newProjExprs = Lists.newArrayList(); + + // project everything from the LHS and then those from the original + // projRel + List leftInputFields = + left.getRowType().getFieldList(); + + for (int i = 0; i < leftInputFields.size(); i++) { + newProjExprs.add(RexInputRef.of2(i, leftInputFields)); + } + + // Marked where the projected expr is coming from so that the types will + // become nullable for the original projections which are now coming out + // of the nullable side of the OJ. + boolean projectPulledAboveLeftCorrelator = + joinType.generatesNullsOnRight(); + + for (Pair pair : project.getNamedProjects()) { + RexNode newProjExpr = + removeCorrelationExpr( + pair.left, + projectPulledAboveLeftCorrelator, + nullIndicator); + + newProjExprs.add(Pair.of(newProjExpr, pair.right)); + } + + return RelOptUtil.createProject(join, newProjExprs, false); + } + + /** + * Pulls a {@link Project} above a {@link Correlate} from its RHS input. + * Enforces nullability for join output. + * + * @param correlate Correlate + * @param project the original project as the RHS input of the join + * @param isCount Positions which are calls to the COUNT + * aggregation function + * @return the subtree with the new LogicalProject at the root + */ + private RelNode aggregateCorrelatorOutput( + Correlate correlate, + LogicalProject project, + Set isCount) { + final RelNode left = correlate.getLeft(); + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + + // now create the new project + final List> newProjects = Lists.newArrayList(); + + // Project everything from the LHS and then those from the original + // project + final List leftInputFields = + left.getRowType().getFieldList(); + + for (int i = 0; i < leftInputFields.size(); i++) { + newProjects.add(RexInputRef.of2(i, leftInputFields)); + } + + // Marked where the projected expr is coming from so that the types will + // become nullable for the original projections which are now coming out + // of the nullable side of the OJ. + boolean projectPulledAboveLeftCorrelator = + joinType.generatesNullsOnRight(); + + for (Pair pair : project.getNamedProjects()) { + RexNode newProjExpr = + removeCorrelationExpr( + pair.left, + projectPulledAboveLeftCorrelator, + isCount); + newProjects.add(Pair.of(newProjExpr, pair.right)); + } + + return RelOptUtil.createProject(correlate, newProjects, false); + } + + /** + * Checks whether the correlations in projRel and filter are related to + * the correlated variables provided by corRel. + * + * @param correlate Correlate + * @param project The original Project as the RHS input of the join + * @param filter Filter + * @param correlatedJoinKeys Correlated join keys + * @return true if filter and proj only references corVar provided by corRel + */ + private boolean checkCorVars( + LogicalCorrelate correlate, + LogicalProject project, + LogicalFilter filter, + List correlatedJoinKeys) { + if (filter != null) { + assert correlatedJoinKeys != null; + + // check that all correlated refs in the filter condition are + // used in the join(as field access). + Set corVarInFilter = + Sets.newHashSet(cm.mapRefRelToCorVar.get(filter)); + + for (RexFieldAccess correlatedJoinKey : correlatedJoinKeys) { + corVarInFilter.remove(cm.mapFieldAccessToCorVar.get(correlatedJoinKey)); + } + + if (!corVarInFilter.isEmpty()) { + return false; + } + + // Check that the correlated variables referenced in these + // comparisons do come from the correlatorRel. + corVarInFilter.addAll(cm.mapRefRelToCorVar.get(filter)); + + for (Correlation corVar : corVarInFilter) { + if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) { + return false; + } + } + } + + // if project has any correlated reference, make sure they are also + // provided by the current correlate. They will be projected out of the LHS + // of the correlate. + if ((project != null) && cm.mapRefRelToCorVar.containsKey(project)) { + for (Correlation corVar : cm.mapRefRelToCorVar.get(project)) { + if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) { + return false; + } + } + } + + return true; + } + + /** + * Remove correlated variables from the tree at root corRel + * + * @param correlate Correlator + */ + private void removeCorVarFromTree(LogicalCorrelate correlate) { + if (cm.mapCorVarToCorRel.get(correlate.getCorrelationId()) == correlate) { + cm.mapCorVarToCorRel.remove(correlate.getCorrelationId()); + } + } + + /** + * Projects all {@code input} output fields plus the additional expressions. + * + * @param input Input relational expression + * @param additionalExprs Additional expressions and names + * @return the new LogicalProject + */ + private RelNode createProjectWithAdditionalExprs( + RelNode input, + List> additionalExprs) { + final List fieldList = + input.getRowType().getFieldList(); + List> projects = Lists.newArrayList(); + for (Ord field : Ord.zip(fieldList)) { + projects.add( + Pair.of( + (RexNode) rexBuilder.makeInputRef( + field.e.getType(), field.i), + field.e.getName())); + } + projects.addAll(additionalExprs); + return RelOptUtil.createProject(input, projects, false); + } + + /* Returns an immutable map with the identity [0: 0, .., count-1: count-1]. */ + static Map identityMap(int count) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i = 0; i < count; i++) { + builder.put(i, i); + } + return builder.build(); + } + + /** Registers a relational expression and the relational expression it became + * after decorrelation. */ + Frame register(RelNode rel, RelNode newRel, + Map oldToNewOutputPos, + SortedMap corVarToOutputPos) { + assert allLessThan(oldToNewOutputPos.keySet(), + newRel.getRowType().getFieldCount(), Litmus.THROW); + final Frame frame = new Frame(newRel, corVarToOutputPos, oldToNewOutputPos); + map.put(rel, frame); + return frame; + } + + static boolean allLessThan(Collection integers, int limit, + Litmus ret) { + for (int value : integers) { + if (value >= limit) { + return ret.fail("out of range; value: " + value + ", limit: " + limit); + } + } + return ret.succeed(); + } + + private static RelNode stripHep(RelNode rel) { + if (rel instanceof HepRelVertex) { + HepRelVertex hepRelVertex = (HepRelVertex) rel; + rel = hepRelVertex.getCurrentRel(); + } + return rel; + } + + //~ Inner Classes ---------------------------------------------------------- + + /** Shuttle that decorrelates. */ + private class DecorrelateRexShuttle extends RexShuttle { + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + int newInputOutputOffset = 0; + for (RelNode input : currentRel.getInputs()) { + final Frame frame = map.get(input); + + if (frame != null) { + // try to find in this input rel the position of cor var + final Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess); + + if (corVar != null) { + Integer newInputPos = frame.corVarOutputPos.get(corVar); + if (newInputPos != null) { + // This input rel does produce the cor var referenced. + // Assume fieldAccess has the correct type info. + return new RexInputRef(newInputPos + newInputOutputOffset, + fieldAccess.getType()); + } + } + + // this input rel does not produce the cor var needed + newInputOutputOffset += frame.r.getRowType().getFieldCount(); + } else { + // this input rel is not rewritten + newInputOutputOffset += input.getRowType().getFieldCount(); + } + } + return fieldAccess; + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + return getNewForOldInputRef(inputRef); + } + } + + /** Shuttle that removes correlations. */ + private class RemoveCorrelationRexShuttle extends RexShuttle { + final RexBuilder rexBuilder; + final RelDataTypeFactory typeFactory; + final boolean projectPulledAboveLeftCorrelator; + final RexInputRef nullIndicator; + final ImmutableSet isCount; + + public RemoveCorrelationRexShuttle( + RexBuilder rexBuilder, + boolean projectPulledAboveLeftCorrelator, + RexInputRef nullIndicator, + Set isCount) { + this.projectPulledAboveLeftCorrelator = + projectPulledAboveLeftCorrelator; + this.nullIndicator = nullIndicator; // may be null + this.isCount = ImmutableSet.copyOf(isCount); + this.rexBuilder = rexBuilder; + this.typeFactory = rexBuilder.getTypeFactory(); + } + + private RexNode createCaseExpression( + RexInputRef nullInputRef, + RexLiteral lit, + RexNode rexNode) { + RexNode[] caseOperands = new RexNode[3]; + + // Construct a CASE expression to handle the null indicator. + // + // This also covers the case where a left correlated subquery + // projects fields from outer relation. Since LOJ cannot produce + // nulls on the LHS, the projection now need to make a nullable LHS + // reference using a nullability indicator. If this this indicator + // is null, it means the subquery does not produce any value. As a + // result, any RHS ref by this usbquery needs to produce null value. + + // WHEN indicator IS NULL + caseOperands[0] = + rexBuilder.makeCall( + SqlStdOperatorTable.IS_NULL, + new RexInputRef( + nullInputRef.getIndex(), + typeFactory.createTypeWithNullability( + nullInputRef.getType(), + true))); + + // THEN CAST(NULL AS newInputTypeNullable) + caseOperands[1] = + rexBuilder.makeCast( + typeFactory.createTypeWithNullability( + rexNode.getType(), + true), + lit); + + // ELSE cast (newInput AS newInputTypeNullable) END + caseOperands[2] = + rexBuilder.makeCast( + typeFactory.createTypeWithNullability( + rexNode.getType(), + true), + rexNode); + + return rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + caseOperands); + } + + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + if (cm.mapFieldAccessToCorVar.containsKey(fieldAccess)) { + // if it is a corVar, change it to be input ref. + Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess); + + // corVar offset should point to the leftInput of currentRel, + // which is the Correlator. + RexNode newRexNode = + new RexInputRef(corVar.field, fieldAccess.getType()); + + if (projectPulledAboveLeftCorrelator + && (nullIndicator != null)) { + // need to enforce nullability by applying an additional + // cast operator over the transformed expression. + newRexNode = + createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + newRexNode); + } + return newRexNode; + } + return fieldAccess; + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + if (currentRel instanceof LogicalCorrelate) { + // if this rel references corVar + // and now it needs to be rewritten + // it must have been pulled above the Correlator + // replace the input ref to account for the LHS of the + // Correlator + final int leftInputFieldCount = + ((LogicalCorrelate) currentRel).getLeft().getRowType() + .getFieldCount(); + RelDataType newType = inputRef.getType(); + + if (projectPulledAboveLeftCorrelator) { + newType = + typeFactory.createTypeWithNullability(newType, true); + } + + int pos = inputRef.getIndex(); + RexInputRef newInputRef = + new RexInputRef(leftInputFieldCount + pos, newType); + + if ((isCount != null) && isCount.contains(pos)) { + return createCaseExpression( + newInputRef, + rexBuilder.makeExactLiteral(BigDecimal.ZERO), + newInputRef); + } else { + return newInputRef; + } + } + return inputRef; + } + + @Override public RexNode visitLiteral(RexLiteral literal) { + // Use nullIndicator to decide whether to project null. + // Do nothing if the literal is null. + if (!RexUtil.isNull(literal) + && projectPulledAboveLeftCorrelator + && (nullIndicator != null)) { + return createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + literal); + } + return literal; + } + + @Override public RexNode visitCall(final RexCall call) { + RexNode newCall; + + boolean[] update = {false}; + List clonedOperands = visitList(call.operands, update); + if (update[0]) { + SqlOperator operator = call.getOperator(); + + boolean isSpecialCast = false; + if (operator instanceof SqlFunction) { + SqlFunction function = (SqlFunction) operator; + if (function.getKind() == SqlKind.CAST) { + if (call.operands.size() < 2) { + isSpecialCast = true; + } + } + } + + final RelDataType newType; + if (!isSpecialCast) { + // TODO: ideally this only needs to be called if the result + // type will also change. However, since that requires + // support from type inference rules to tell whether a rule + // decides return type based on input types, for now all + // operators will be recreated with new type if any operand + // changed, unless the operator has "built-in" type. + newType = rexBuilder.deriveReturnType(operator, clonedOperands); + } else { + // Use the current return type when creating a new call, for + // operators with return type built into the operator + // definition, and with no type inference rules, such as + // cast function with less than 2 operands. + + // TODO: Comments in RexShuttle.visitCall() mention other + // types in this category. Need to resolve those together + // and preferably in the base class RexShuttle. + newType = call.getType(); + } + newCall = + rexBuilder.makeCall( + newType, + operator, + clonedOperands); + } else { + newCall = call; + } + + if (projectPulledAboveLeftCorrelator && (nullIndicator != null)) { + return createCaseExpression( + nullIndicator, + rexBuilder.constantNull(), + newCall); + } + return newCall; + } + } + + /** + * Rule to remove single_value rel. For cases like + * + *

AggRel single_value proj/filter/agg/ join on unique LHS key + * AggRel single group
+ */ + private final class RemoveSingleAggregateRule extends RelOptRule { + public RemoveSingleAggregateRule() { + super( + operand( + LogicalAggregate.class, + operand( + LogicalProject.class, + operand(LogicalAggregate.class, any())))); + } + + public void onMatch(RelOptRuleCall call) { + LogicalAggregate singleAggregate = call.rel(0); + LogicalProject project = call.rel(1); + LogicalAggregate aggregate = call.rel(2); + + // check singleAggRel is single_value agg + if ((!singleAggregate.getGroupSet().isEmpty()) + || (singleAggregate.getAggCallList().size() != 1) + || !(singleAggregate.getAggCallList().get(0).getAggregation() + instanceof SqlSingleValueAggFunction)) { + return; + } + + // check projRel only projects one expression + // check this project only projects one expression, i.e. scalar + // subqueries. + List projExprs = project.getProjects(); + if (projExprs.size() != 1) { + return; + } + + // check the input to projRel is an aggregate on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + // singleAggRel produces a nullable type, so create the new + // projection that casts proj expr to a nullable type. + final RelOptCluster cluster = project.getCluster(); + RelNode newProject = + RelOptUtil.createProject(aggregate, + ImmutableList.of( + rexBuilder.makeCast( + cluster.getTypeFactory().createTypeWithNullability( + projExprs.get(0).getType(), + true), + projExprs.get(0))), + null); + call.transformTo(newProject); + } + } + + /** Planner rule that removes correlations for scalar projects. */ + private final class RemoveCorrelationForScalarProjectRule extends RelOptRule { + public RemoveCorrelationForScalarProjectRule() { + super( + operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalAggregate.class, + operand(LogicalProject.class, + operand(RelNode.class, any()))))); + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalAggregate aggregate = call.rel(2); + final LogicalProject project = call.rel(3); + RelNode right = call.rel(4); + final RelOptCluster cluster = correlate.getCluster(); + + setCurrent(call.getPlanner().getRoot(), correlate); + + // Check for this pattern. + // The pattern matching could be simplified if rules can be applied + // during decorrelation. + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalAggregate (groupby (0) single_value()) + // LogicalProject-A (may reference coVar) + // RightInputRel + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is of the following type: + // doing a single_value() on the entire input + if ((!aggregate.getGroupSet().isEmpty()) + || (aggregate.getAggCallList().size() != 1) + || !(aggregate.getAggCallList().get(0).getAggregation() + instanceof SqlSingleValueAggFunction)) { + return; + } + + // check this project only projects one expression, i.e. scalar + // subqueries. + if (project.getProjects().size() != 1) { + return; + } + + int nullIndicatorPos; + + if ((right instanceof LogicalFilter) + && cm.mapRefRelToCorVar.containsKey(right)) { + // rightInputRel has this shape: + // + // LogicalFilter (references corvar) + // FilterInputRel + + // If rightInputRel is a filter and contains correlated + // reference, make sure the correlated keys in the filter + // condition forms a unique key of the RHS. + + LogicalFilter filter = (LogicalFilter) right; + right = filter.getInput(); + + assert right instanceof HepRelVertex; + right = ((HepRelVertex) right).getCurrentRel(); + + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // extract the correlation out of the filter + + // First breaking up the filter conditions into equality + // comparisons between rightJoinKeys(from the original + // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys + // can be expressions, while rightJoinKeys need to be input + // refs. These comparisons are AND'ed together. + List tmpRightJoinKeys = Lists.newArrayList(); + List correlatedJoinKeys = Lists.newArrayList(); + RelOptUtil.splitCorrelatedFilterCondition( + filter, + tmpRightJoinKeys, + correlatedJoinKeys, + false); + + // check that the columns referenced in these comparisons form + // an unique key of the filterInputRel + final List rightJoinKeys = new ArrayList<>(); + for (RexNode key : tmpRightJoinKeys) { + assert key instanceof RexInputRef; + rightJoinKeys.add((RexInputRef) key); + } + + // check that the columns referenced in rightJoinKeys form an + // unique key of the filterInputRel + if (rightJoinKeys.isEmpty()) { + return; + } + + // The join filters out the nulls. So, it's ok if there are + // nulls in the join keys. + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, right, + rightJoinKeys)) { + //SQL2REL_LOGGER.fine(rightJoinKeys.toString() + // + "are not unique keys for " + // + right.toString()); + return; + } + + RexUtil.FieldAccessFinder visitor = + new RexUtil.FieldAccessFinder(); + RexUtil.apply(visitor, correlatedJoinKeys, null); + List correlatedKeyList = + visitor.getFieldAccessList(); + + if (!checkCorVars(correlate, project, filter, correlatedKeyList)) { + return; + } + + // Change the plan to this structure. + // Note that the aggregateRel is removed. + // + // LogicalProject-A' (replace corvar to input ref from the LogicalJoin) + // LogicalJoin (replace corvar to input ref from LeftInputRel) + // LeftInputRel + // RightInputRel(oreviously FilterInputRel) + + // Change the filter condition into a join condition + joinCond = + removeCorrelationExpr(filter.getCondition(), false); + + nullIndicatorPos = + left.getRowType().getFieldCount() + + rightJoinKeys.get(0).getIndex(); + } else if (cm.mapRefRelToCorVar.containsKey(project)) { + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + if (!checkCorVars(correlate, project, null, null)) { + return; + } + + // Change the plan to this structure. + // + // LogicalProject-A' (replace corvar to input ref from LogicalJoin) + // LogicalJoin (left, condition = true) + // LeftInputRel + // LogicalAggregate(groupby(0), single_value(0), s_v(1)....) + // LogicalProject-B (everything from input plus literal true) + // ProjInputRel + + // make the new projRel to provide a null indicator + right = + createProjectWithAdditionalExprs(right, + ImmutableList.of( + Pair.of( + rexBuilder.makeLiteral(true), "nullIndicator"))); + + // make the new aggRel + right = + RelOptUtil.createSingleValueAggRel(cluster, right); + + // The last field: + // single_value(true) + // is the nullIndicator + nullIndicatorPos = + left.getRowType().getFieldCount() + + right.getRowType().getFieldCount() - 1; + } else { + return; + } + + // make the new join rel + LogicalJoin join = + LogicalJoin.create(left, right, joinCond, + ImmutableSet.of(), joinType); + + RelNode newProject = + projectJoinOutputWithNullability(join, project, nullIndicatorPos); + + call.transformTo(newProject); + + removeCorVarFromTree(correlate); + } + } + + /** Planner rule that removes correlations for scalar aggregates. */ + private final class RemoveCorrelationForScalarAggregateRule + extends RelOptRule { + public RemoveCorrelationForScalarAggregateRule() { + super( + operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalProject.class, + operand(LogicalAggregate.class, null, Aggregate.IS_SIMPLE, + operand(LogicalProject.class, + operand(RelNode.class, any())))))); + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalProject aggOutputProject = call.rel(2); + final LogicalAggregate aggregate = call.rel(3); + final LogicalProject aggInputProject = call.rel(4); + RelNode right = call.rel(5); + final RelOptCluster cluster = correlate.getCluster(); + + setCurrent(call.getPlanner().getRoot(), correlate); + + // check for this pattern + // The pattern matching could be simplified if rules can be applied + // during decorrelation, + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby (0), agg0(), agg1()...) + // LogicalProject-B (references coVar) + // rightInputRel + + // check aggOutputProject projects only one expression + final List aggOutputProjects = aggOutputProject.getProjects(); + if (aggOutputProjects.size() != 1) { + return; + } + + final JoinRelType joinType = correlate.getJoinType().toJoinType(); + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + final List aggInputProjects = aggInputProject.getProjects(); + + final List aggCalls = aggregate.getAggCallList(); + final Set isCountStar = Sets.newHashSet(); + + // mark if agg produces count(*) which needs to reference the + // nullIndicator after the transformation. + int k = -1; + for (AggregateCall aggCall : aggCalls) { + ++k; + if ((aggCall.getAggregation() instanceof SqlCountAggFunction) + && (aggCall.getArgList().size() == 0)) { + isCountStar.add(k); + } + } + + if ((right instanceof LogicalFilter) + && cm.mapRefRelToCorVar.containsKey(right)) { + // rightInputRel has this shape: + // + // LogicalFilter (references corvar) + // FilterInputRel + LogicalFilter filter = (LogicalFilter) right; + right = filter.getInput(); + + assert right instanceof HepRelVertex; + right = ((HepRelVertex) right).getCurrentRel(); + + // check filter input contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // check filter condition type First extract the correlation out + // of the filter + + // First breaking up the filter conditions into equality + // comparisons between rightJoinKeys(from the original + // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys + // can only be RexFieldAccess, while rightJoinKeys can be + // expressions. These comparisons are AND'ed together. + List rightJoinKeys = Lists.newArrayList(); + List tmpCorrelatedJoinKeys = Lists.newArrayList(); + RelOptUtil.splitCorrelatedFilterCondition( + filter, + rightJoinKeys, + tmpCorrelatedJoinKeys, + true); + + // make sure the correlated reference forms a unique key check + // that the columns referenced in these comparisons form an + // unique key of the leftInputRel + List correlatedJoinKeys = Lists.newArrayList(); + List correlatedInputRefJoinKeys = Lists.newArrayList(); + for (RexNode joinKey : tmpCorrelatedJoinKeys) { + assert joinKey instanceof RexFieldAccess; + correlatedJoinKeys.add((RexFieldAccess) joinKey); + RexNode correlatedInputRef = + removeCorrelationExpr(joinKey, false); + assert correlatedInputRef instanceof RexInputRef; + correlatedInputRefJoinKeys.add( + (RexInputRef) correlatedInputRef); + } + + // check that the columns referenced in rightJoinKeys form an + // unique key of the filterInputRel + if (correlatedInputRefJoinKeys.isEmpty()) { + return; + } + + // The join filters out the nulls. So, it's ok if there are + // nulls in the join keys. + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, left, + correlatedInputRefJoinKeys)) { + //SQL2REL_LOGGER.fine(correlatedJoinKeys.toString() + // + "are not unique keys for " + // + left.toString()); + return; + } + + // check cor var references are valid + if (!checkCorVars(correlate, + aggInputProject, + filter, + correlatedJoinKeys)) { + return; + } + + // Rewrite the above plan: + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby(0), agg0(),agg1()...) + // LogicalProject-B (may reference coVar) + // LogicalFilter (references corVar) + // RightInputRel (no correlated reference) + // + + // to this plan: + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs) + // agg0(rewritten expression), + // agg1()...) + // LogicalProject-B' (rewriten original projected exprs) + // LogicalJoin(replace corvar w/ input ref from LeftInputRel) + // LeftInputRel + // RightInputRel + // + + // In the case where agg is count(*) or count($corVar), it is + // changed to count(nullIndicator). + // Note: any non-nullable field from the RHS can be used as + // the indicator however a "true" field is added to the + // projection list from the RHS for simplicity to avoid + // searching for non-null fields. + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs), + // count(nullIndicator), other aggs...) + // LogicalProject-B' (all left input refs plus + // the rewritten original projected exprs) + // LogicalJoin(replace corvar to input ref from LeftInputRel) + // LeftInputRel + // LogicalProject (everything from RightInputRel plus + // the nullIndicator "true") + // RightInputRel + // + + // first change the filter condition into a join condition + joinCond = + removeCorrelationExpr(filter.getCondition(), false); + } else if (cm.mapRefRelToCorVar.containsKey(aggInputProject)) { + // check rightInputRel contains no correlation + if (RelOptUtil.getVariablesUsed(right).size() > 0) { + return; + } + + // check cor var references are valid + if (!checkCorVars(correlate, aggInputProject, null, null)) { + return; + } + + int nFields = left.getRowType().getFieldCount(); + ImmutableBitSet allCols = ImmutableBitSet.range(nFields); + + // leftInputRel contains unique keys + // i.e. each row is distinct and can group by on all the left + // fields + final RelMetadataQuery mq = RelMetadataQuery.instance(); + if (!RelMdUtil.areColumnsDefinitelyUnique(mq, left, allCols)) { + //SQL2REL_LOGGER.fine("There are no unique keys for " + left); + return; + } + // + // Rewrite the above plan: + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby(0), agg0(), agg1()...) + // LogicalProject-B (references coVar) + // RightInputRel (no correlated reference) + // + + // to this plan: + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs) + // agg0(rewritten expression), + // agg1()...) + // LogicalProject-B' (rewriten original projected exprs) + // LogicalJoin (LOJ cond = true) + // LeftInputRel + // RightInputRel + // + + // In the case where agg is count($corVar), it is changed to + // count(nullIndicator). + // Note: any non-nullable field from the RHS can be used as + // the indicator however a "true" field is added to the + // projection list from the RHS for simplicity to avoid + // searching for non-null fields. + // + // LogicalProject-A' (all gby keys + rewritten nullable ProjExpr) + // LogicalAggregate (groupby(all left input refs), + // count(nullIndicator), other aggs...) + // LogicalProject-B' (all left input refs plus + // the rewritten original projected exprs) + // LogicalJoin(replace corvar to input ref from LeftInputRel) + // LeftInputRel + // LogicalProject (everything from RightInputRel plus + // the nullIndicator "true") + // RightInputRel + } else { + return; + } + + RelDataType leftInputFieldType = left.getRowType(); + int leftInputFieldCount = leftInputFieldType.getFieldCount(); + int joinOutputProjExprCount = + leftInputFieldCount + aggInputProjects.size() + 1; + + right = + createProjectWithAdditionalExprs(right, + ImmutableList.of( + Pair.of(rexBuilder.makeLiteral(true), + "nullIndicator"))); + + LogicalJoin join = + LogicalJoin.create(left, right, joinCond, + ImmutableSet.of(), joinType); + + // To the consumer of joinOutputProjRel, nullIndicator is located + // at the end + int nullIndicatorPos = join.getRowType().getFieldCount() - 1; + + RexInputRef nullIndicator = + new RexInputRef( + nullIndicatorPos, + cluster.getTypeFactory().createTypeWithNullability( + join.getRowType().getFieldList() + .get(nullIndicatorPos).getType(), + true)); + + // first project all group-by keys plus the transformed agg input + List joinOutputProjects = Lists.newArrayList(); + + // LOJ Join preserves LHS types + for (int i = 0; i < leftInputFieldCount; i++) { + joinOutputProjects.add( + rexBuilder.makeInputRef( + leftInputFieldType.getFieldList().get(i).getType(), i)); + } + + for (RexNode aggInputProjExpr : aggInputProjects) { + joinOutputProjects.add( + removeCorrelationExpr(aggInputProjExpr, + joinType.generatesNullsOnRight(), + nullIndicator)); + } + + joinOutputProjects.add( + rexBuilder.makeInputRef(join, nullIndicatorPos)); + + RelNode joinOutputProject = + RelOptUtil.createProject( + join, + joinOutputProjects, + null); + + // nullIndicator is now at a different location in the output of + // the join + nullIndicatorPos = joinOutputProjExprCount - 1; + + final int groupCount = leftInputFieldCount; + + List newAggCalls = Lists.newArrayList(); + k = -1; + for (AggregateCall aggCall : aggCalls) { + ++k; + final List argList; + + if (isCountStar.contains(k)) { + // this is a count(*), transform it to count(nullIndicator) + // the null indicator is located at the end + argList = Collections.singletonList(nullIndicatorPos); + } else { + argList = Lists.newArrayList(); + + for (int aggArg : aggCall.getArgList()) { + argList.add(aggArg + groupCount); + } + } + + int filterArg = aggCall.filterArg < 0 ? aggCall.filterArg + : aggCall.filterArg + groupCount; + newAggCalls.add( + aggCall.adaptTo(joinOutputProject, argList, filterArg, + aggregate.getGroupCount(), groupCount)); + } + + ImmutableBitSet groupSet = + ImmutableBitSet.range(groupCount); + LogicalAggregate newAggregate = + LogicalAggregate.create(joinOutputProject, + false, + groupSet, + null, + newAggCalls); + + List newAggOutputProjectList = Lists.newArrayList(); + for (int i : groupSet) { + newAggOutputProjectList.add( + rexBuilder.makeInputRef(newAggregate, i)); + } + + RexNode newAggOutputProjects = + removeCorrelationExpr(aggOutputProjects.get(0), false); + newAggOutputProjectList.add( + rexBuilder.makeCast( + cluster.getTypeFactory().createTypeWithNullability( + newAggOutputProjects.getType(), + true), + newAggOutputProjects)); + + RelNode newAggOutputProject = + RelOptUtil.createProject( + newAggregate, + newAggOutputProjectList, + null); + + call.transformTo(newAggOutputProject); + + removeCorVarFromTree(correlate); + } + } + + // REVIEW jhyde 29-Oct-2007: This rule is non-static, depends on the state + // of members in RelDecorrelator, and has side-effects in the decorrelator. + // This breaks the contract of a planner rule, and the rule will not be + // reusable in other planners. + + // REVIEW jvs 29-Oct-2007: Shouldn't it also be incorporating + // the flavor attribute into the description? + + /** Planner rule that adjusts projects when counts are added. */ + private final class AdjustProjectForCountAggregateRule extends RelOptRule { + final boolean flavor; + + public AdjustProjectForCountAggregateRule(boolean flavor) { + super( + flavor + ? operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalProject.class, + operand(LogicalAggregate.class, any()))) + : operand(LogicalCorrelate.class, + operand(RelNode.class, any()), + operand(LogicalAggregate.class, any()))); + this.flavor = flavor; + } + + public void onMatch(RelOptRuleCall call) { + final LogicalCorrelate correlate = call.rel(0); + final RelNode left = call.rel(1); + final LogicalProject aggOutputProject; + final LogicalAggregate aggregate; + if (flavor) { + aggOutputProject = call.rel(2); + aggregate = call.rel(3); + } else { + aggregate = call.rel(2); + + // Create identity projection + final List> projects = Lists.newArrayList(); + final List fields = + aggregate.getRowType().getFieldList(); + for (int i = 0; i < fields.size(); i++) { + projects.add(RexInputRef.of2(projects.size(), fields)); + } + aggOutputProject = + (LogicalProject) RelOptUtil.createProject( + aggregate, + projects, + false); + } + onMatch2(call, correlate, left, aggOutputProject, aggregate); + } + + private void onMatch2( + RelOptRuleCall call, + LogicalCorrelate correlate, + RelNode leftInput, + LogicalProject aggOutputProject, + LogicalAggregate aggregate) { + if (generatedCorRels.contains(correlate)) { + // This correlator was generated by a previous invocation of + // this rule. No further work to do. + return; + } + + setCurrent(call.getPlanner().getRoot(), correlate); + + // check for this pattern + // The pattern matching could be simplified if rules can be applied + // during decorrelation, + // + // CorrelateRel(left correlation, condition = true) + // LeftInputRel + // LogicalProject-A (a RexNode) + // LogicalAggregate (groupby (0), agg0(), agg1()...) + + // check aggOutputProj projects only one expression + List aggOutputProjExprs = aggOutputProject.getProjects(); + if (aggOutputProjExprs.size() != 1) { + return; + } + + JoinRelType joinType = correlate.getJoinType().toJoinType(); + // corRel.getCondition was here, however Correlate was updated so it + // never includes a join condition. The code was not modified for brevity. + RexNode joinCond = rexBuilder.makeLiteral(true); + if ((joinType != JoinRelType.LEFT) + || (joinCond != rexBuilder.makeLiteral(true))) { + return; + } + + // check that the agg is on the entire input + if (!aggregate.getGroupSet().isEmpty()) { + return; + } + + List aggCalls = aggregate.getAggCallList(); + Set isCount = Sets.newHashSet(); + + // remember the count() positions + int i = -1; + for (AggregateCall aggCall : aggCalls) { + ++i; + if (aggCall.getAggregation() instanceof SqlCountAggFunction) { + isCount.add(i); + } + } + + // now rewrite the plan to + // + // Project-A' (all LHS plus transformed original projections, + // replacing references to count() with case statement) + // Correlator(left correlation, condition = true) + // LeftInputRel + // LogicalAggregate (groupby (0), agg0(), agg1()...) + // + LogicalCorrelate newCorrelate = + LogicalCorrelate.create(leftInput, aggregate, + correlate.getCorrelationId(), correlate.getRequiredColumns(), + correlate.getJoinType()); + + // remember this rel so we don't fire rule on it again + // REVIEW jhyde 29-Oct-2007: rules should not save state; rule + // should recognize patterns where it does or does not need to do + // work + generatedCorRels.add(newCorrelate); + + // need to update the mapCorVarToCorRel Update the output position + // for the cor vars: only pass on the cor vars that are not used in + // the join key. + if (cm.mapCorVarToCorRel.get(correlate.getCorrelationId()) == correlate) { + cm.mapCorVarToCorRel.put(correlate.getCorrelationId(), newCorrelate); + } + + RelNode newOutput = + aggregateCorrelatorOutput(newCorrelate, aggOutputProject, isCount); + + call.transformTo(newOutput); + } + } + + /** + * {@code Correlation} here represents a unique reference to a correlation + * field. + * For instance, if a RelNode references emp.name multiple times, it would + * result in multiple {@code Correlation} objects that differ just in + * {@link Correlation#uniqueKey}. + */ + static class Correlation + implements Comparable { + public final int uniqueKey; + public final CorrelationId corr; + public final int field; + + Correlation(CorrelationId corr, int field, int uniqueKey) { + this.corr = corr; + this.field = field; + this.uniqueKey = uniqueKey; + } + + public int compareTo(Correlation o) { + int c = corr.compareTo(o.corr); + if (c != 0) { + return c; + } + c = Integer.compare(field, o.field); + if (c != 0) { + return c; + } + return Integer.compare(uniqueKey, o.uniqueKey); + } + } + + /** A map of the locations of + * {@link org.apache.calcite.rel.logical.LogicalCorrelate} + * in a tree of {@link RelNode}s. + * + *

It is used to drive the decorrelation process. + * Treat it as immutable; rebuild if you modify the tree. + * + *

There are three maps:

    + * + *
  1. mapRefRelToCorVars map a rel node to the correlated variables it + * references; + * + *
  2. mapCorVarToCorRel maps a correlated variable to the correlatorRel + * providing it; + * + *
  3. mapFieldAccessToCorVar maps a rex field access to + * the cor var it represents. Because typeFlattener does not clone or + * modify a correlated field access this map does not need to be + * updated. + * + *
*/ + private static class CorelMap { + private final Multimap mapRefRelToCorVar; + private final SortedMap mapCorVarToCorRel; + private final Map mapFieldAccessToCorVar; + + // TODO: create immutable copies of all maps + private CorelMap(Multimap mapRefRelToCorVar, + SortedMap mapCorVarToCorRel, + Map mapFieldAccessToCorVar) { + this.mapRefRelToCorVar = mapRefRelToCorVar; + this.mapCorVarToCorRel = mapCorVarToCorRel; + this.mapFieldAccessToCorVar = ImmutableMap.copyOf(mapFieldAccessToCorVar); + } + + @Override public String toString() { + return "mapRefRelToCorVar=" + mapRefRelToCorVar + + "\nmapCorVarToCorRel=" + mapCorVarToCorRel + + "\nmapFieldAccessToCorVar=" + mapFieldAccessToCorVar + + "\n"; + } + + @Override public boolean equals(Object obj) { + return obj == this + || obj instanceof CorelMap + && mapRefRelToCorVar.equals(((CorelMap) obj).mapRefRelToCorVar) + && mapCorVarToCorRel.equals(((CorelMap) obj).mapCorVarToCorRel) + && mapFieldAccessToCorVar.equals( + ((CorelMap) obj).mapFieldAccessToCorVar); + } + + @Override public int hashCode() { + return com.google.common.base.Objects.hashCode(mapRefRelToCorVar, + mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + /** Creates a CorelMap with given contents. */ + public static CorelMap of( + SortedSetMultimap mapRefRelToCorVar, + SortedMap mapCorVarToCorRel, + Map mapFieldAccessToCorVar) { + return new CorelMap(mapRefRelToCorVar, mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + /** + * Returns whether there are any correlating variables in this statement. + * + * @return whether there are any correlating variables + */ + public boolean hasCorrelation() { + return !mapCorVarToCorRel.isEmpty(); + } + } + + /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */ + private static class CorelMapBuilder extends HiveRelShuttleImpl { + final SortedMap mapCorVarToCorRel = + new TreeMap<>(); + + final SortedSetMultimap mapRefRelToCorVar = + Multimaps.newSortedSetMultimap( + Maps.>newHashMap(), + new Supplier>() { + public TreeSet get() { + Bug.upgrade("use MultimapBuilder when we're on Guava-16"); + return Sets.newTreeSet(); + } + }); + + final Map mapFieldAccessToCorVar = + new HashMap<>(); + + final Holder offset = Holder.of(0); + int corrIdGenerator = 0; + + final List stack = new ArrayList<>(); + + /** Creates a CorelMap by iterating over a {@link RelNode} tree. */ + CorelMap build(RelNode rel) { + stripHep(rel).accept(this); + return new CorelMap(mapRefRelToCorVar, mapCorVarToCorRel, + mapFieldAccessToCorVar); + } + + @Override public RelNode visit(LogicalJoin join) { + try { + Stacks.push(stack, join); + join.getCondition().accept(rexVisitor(join)); + } finally { + Stacks.pop(stack, join); + } + return visitJoin(join); + } + + public RelNode visit(HiveJoin join) { + try { + Stacks.push(stack, join); + join.getCondition().accept(rexVisitor(join)); + } finally { + Stacks.pop(stack, join); + } + return visitJoin(join); + } + + @Override protected RelNode visitChild(RelNode parent, int i, + RelNode input) { + return super.visitChild(parent, i, stripHep(input)); + } + + @Override public RelNode visit(LogicalCorrelate correlate) { + mapCorVarToCorRel.put(correlate.getCorrelationId(), correlate); + return visitJoin(correlate); + } + + private RelNode visitJoin(BiRel join) { + final int x = offset.get(); + visitChild(join, 0, join.getLeft()); + offset.set(x + join.getLeft().getRowType().getFieldCount()); + visitChild(join, 1, join.getRight()); + offset.set(x); + return join; + } + + public RelNode visit(final HiveProject project) { + try { + Stacks.push(stack, project); + for (RexNode node : project.getProjects()) { + node.accept(rexVisitor(project)); + } + } finally { + Stacks.pop(stack, project); + } + return super.visit(project); + } + public RelNode visit(final HiveFilter filter) { + try { + Stacks.push(stack, filter); + filter.getCondition().accept(rexVisitor(filter)); + } finally { + Stacks.pop(stack, filter); + } + return super.visit(filter); + } + @Override public RelNode visit(final LogicalFilter filter) { + try { + Stacks.push(stack, filter); + filter.getCondition().accept(rexVisitor(filter)); + } finally { + Stacks.pop(stack, filter); + } + return super.visit(filter); + } + + @Override public RelNode visit(LogicalProject project) { + try { + Stacks.push(stack, project); + for (RexNode node : project.getProjects()) { + node.accept(rexVisitor(project)); + } + } finally { + Stacks.pop(stack, project); + } + return super.visit(project); + } + + private RexVisitorImpl rexVisitor(final RelNode rel) { + return new RexVisitorImpl(true) { + @Override public Void visitFieldAccess(RexFieldAccess fieldAccess) { + final RexNode ref = fieldAccess.getReferenceExpr(); + if (ref instanceof RexCorrelVariable) { + final RexCorrelVariable var = (RexCorrelVariable) ref; + if(mapFieldAccessToCorVar.containsKey(fieldAccess)) + { + //for cases where different Rel nodes are referring to + // same correlation var (e.g. in case of NOT IN) + // avoid generating another correlation var + // and record the 'rel' is using the same correlation + mapRefRelToCorVar.put(rel, mapFieldAccessToCorVar.get(fieldAccess)); + } + else { + final Correlation correlation = + new Correlation(var.id, + fieldAccess.getField().getIndex(), + corrIdGenerator++); + mapFieldAccessToCorVar.put(fieldAccess, correlation); + mapRefRelToCorVar.put(rel, correlation); + } + } + return super.visitFieldAccess(fieldAccess); + } + + @Override public Void visitSubQuery(RexSubQuery subQuery) { + subQuery.rel.accept(CorelMapBuilder.this); + return super.visitSubQuery(subQuery); + } + }; + } + } + + /** Frame describing the relational expression after decorrelation + * and where to find the output fields and correlation variables + * among its output fields. */ + static class Frame { + final RelNode r; + final ImmutableSortedMap corVarOutputPos; + final ImmutableMap oldToNewOutputPos; + + Frame(RelNode r, SortedMap corVarOutputPos, + Map oldToNewOutputPos) { + this.r = Preconditions.checkNotNull(r); + this.corVarOutputPos = ImmutableSortedMap.copyOf(corVarOutputPos); + this.oldToNewOutputPos = ImmutableSortedMap.copyOf(oldToNewOutputPos); + } + } +} + +// End RelDecorrelator.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java new file mode 100644 index 0000000..577af6d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.LogicVisitor; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql2rel.RelDecorrelator; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveReplicatedRelBuilder; + +/** + * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule + * Transform that converts IN, EXISTS and scalar sub-queries into joins. + * TODO: + * Reason this is replicated instead of using Calcite's is + * Calcite creates null literal with null type but hive needs it to be properly typed + * Need fix for Calcite-1493 + * + *

Sub-queries are represented by {@link RexSubQuery} expressions. + * + *

A sub-query may or may not be correlated. If a sub-query is correlated, + * the wrapped {@link RelNode} will contain a {@link RexCorrelVariable} before + * the rewrite, and the product of the rewrite will be a {@link Correlate}. + * The Correlate can be removed using {@link RelDecorrelator}. + */ +public abstract class HiveSubQueryRemoveRule extends RelOptRule{ + + public static final HiveSubQueryRemoveRule FILTER = + new HiveSubQueryRemoveRule( + operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE, + any()), + RelFactories.LOGICAL_BUILDER, "SubQueryRemoveRule:Filter") { + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + //final RelBuilder builder = call.builder(); + final HiveReplicatedRelBuilder builder = new HiveReplicatedRelBuilder(null, call.rel(0).getCluster(), null); + final RexSubQuery e = + RexUtil.SubQueryFinder.find(filter.getCondition()); + assert e != null; + final RelOptUtil.Logic logic = + LogicVisitor.find(RelOptUtil.Logic.TRUE, + ImmutableList.of(filter.getCondition()), e); + builder.push(filter.getInput()); + final int fieldCount = builder.peek().getRowType().getFieldCount(); + final RexNode target = apply(e, filter.getVariablesSet(), logic, + builder, 1, fieldCount); + final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); + builder.filter(shuttle.apply(filter.getCondition())); + builder.project(fields(builder, filter.getRowType().getFieldCount())); + call.transformTo(builder.build()); + } + }; + + private HiveSubQueryRemoveRule(RelOptRuleOperand operand, + RelBuilderFactory relBuilderFactory, + String description) { + super(operand, relBuilderFactory, description); + } + + protected RexNode apply(RexSubQuery e, Set variablesSet, + RelOptUtil.Logic logic, + HiveReplicatedRelBuilder builder, int inputCount, int offset) { + switch (e.getKind()) { + case SCALAR_QUERY: + builder.push(e.rel); + final RelMetadataQuery mq = RelMetadataQuery.instance(); + final Boolean unique = mq.areColumnsUnique(builder.peek(), + ImmutableBitSet.of()); + if (unique == null || !unique) { + builder.aggregate(builder.groupKey(), + builder.aggregateCall(SqlStdOperatorTable.SINGLE_VALUE, false, null, + null, builder.field(0))); + } + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + return field(builder, inputCount, offset); + + case IN: + case EXISTS: + // Most general case, where the left and right keys might have nulls, and + // caller requires 3-valued logic return. + // + // select e.deptno, e.deptno in (select deptno from emp) + // + // becomes + // + // select e.deptno, + // case + // when ct.c = 0 then false + // when dt.i is not null then true + // when e.deptno is null then null + // when ct.ck < ct.c then null + // else false + // end + // from e + // left join ( + // (select count(*) as c, count(deptno) as ck from emp) as ct + // cross join (select distinct deptno, true as i from emp)) as dt + // on e.deptno = dt.deptno + // + // If keys are not null we can remove "ct" and simplify to + // + // select e.deptno, + // case + // when dt.i is not null then true + // else false + // end + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // We could further simplify to + // + // select e.deptno, + // dt.i is not null + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // but have not yet. + // + // If the logic is TRUE we can just kill the record if the condition + // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner + // join: + // + // select e.deptno, + // true + // from e + // inner join (select distinct deptno from emp) as dt + // on e.deptno = dt.deptno + // + + builder.push(e.rel); + final List fields = new ArrayList<>(); + switch (e.getKind()) { + case IN: + fields.addAll(builder.fields()); + } + + // First, the cross join + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + builder.aggregate(builder.groupKey(), + builder.count(false, "c"), + builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", + builder.fields())); + builder.as("ct"); + if( !variablesSet.isEmpty()) + { + //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } + else + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + + offset += 2; + builder.push(e.rel); + break; + } + + // Now the left join + switch (logic) { + case TRUE: + if (fields.isEmpty()) { + builder.project(builder.alias(builder.literal(true), "i")); + builder.aggregate(builder.groupKey(0)); + } else { + builder.aggregate(builder.groupKey(fields)); + } + break; + default: + fields.add(builder.alias(builder.literal(true), "i")); + builder.project(fields); + builder.distinct(); + } + builder.as("dt"); + final List conditions = new ArrayList<>(); + for (Pair pair + : Pair.zip(e.getOperands(), builder.fields())) { + conditions.add( + builder.equals(pair.left, RexUtil.shift(pair.right, offset))); + } + switch (logic) { + case TRUE: + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet); + return builder.literal(true); + } + builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); + + final List keyIsNulls = new ArrayList<>(); + for (RexNode operand : e.getOperands()) { + if (operand.getType().isNullable()) { + keyIsNulls.add(builder.isNull(operand)); + } + } + final ImmutableList.Builder operands = ImmutableList.builder(); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + operands.add( + builder.equals(builder.field("ct", "c"), builder.literal(0)), + builder.literal(false)); + //now that we are using LEFT OUTER JOIN to join inner count, count(*) + // with outer table, we wouldn't be able to tell if count is zero + // for inner table since inner join with correlated values will get rid + // of all values where join cond is not true (i.e where actual inner table + // will produce zero result). To handle this case we need to check both + // count is zero or count is null + operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); + break; + } + operands.add(builder.isNotNull(builder.field("dt", "i")), + builder.literal(true)); + if (!keyIsNulls.isEmpty()) { + //Calcite creates null literal with Null type here but because HIVE doesn't support null type + // it is appropriately typed boolean + operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); + // we are creating filter here so should not be returning NULL. Not sure why Calcite return NULL + //operands.add(builder.or(keyIsNulls), builder.literal(false)); + } + Boolean b = true; + switch (logic) { + case TRUE_FALSE_UNKNOWN: + b = null; + // fall through + case UNKNOWN_AS_TRUE: + operands.add( + builder.call(SqlStdOperatorTable.LESS_THAN, + builder.field("ct", "ck"), builder.field("ct", "c")), + builder.literal(b)); + break; + } + operands.add(builder.literal(false)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + + default: + throw new AssertionError(e.getKind()); + } + } + + /** Returns a reference to a particular field, by offset, across several + * inputs on a {@link RelBuilder}'s stack. */ + private RexInputRef field(HiveReplicatedRelBuilder builder, int inputCount, int offset) { + for (int inputOrdinal = 0;;) { + final RelNode r = builder.peek(inputCount, inputOrdinal); + if (offset < r.getRowType().getFieldCount()) { + return builder.field(inputCount, inputOrdinal, offset); + } + ++inputOrdinal; + offset -= r.getRowType().getFieldCount(); + } + } + + /** Returns a list of expressions that project the first {@code fieldCount} + * fields of the top input on a {@link RelBuilder}'s stack. */ + private static List fields(HiveReplicatedRelBuilder builder, int fieldCount) { + final List projects = new ArrayList<>(); + for (int i = 0; i < fieldCount; i++) { + projects.add(builder.field(i)); + } + return projects; + } + + /** Shuttle that replaces occurrences of a given + * {@link org.apache.calcite.rex.RexSubQuery} with a replacement + * expression. */ + private static class ReplaceSubQueryShuttle extends RexShuttle { + private final RexSubQuery subQuery; + private final RexNode replacement; + + public ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) { + this.subQuery = subQuery; + this.replacement = replacement; + } + + @Override public RexNode visitSubQuery(RexSubQuery subQuery) { + return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery; + } + } +} + +// End SubQueryRemoveRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index d494c9f..b8a5642 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -266,6 +266,12 @@ OpAttr visit(HiveTableScan scanRel) { TableScanOperator ts = (TableScanOperator) OperatorFactory.get( semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos)); + //now that we let Calcite process subqueries we might have more than one + // tablescan with same alias. + if(topOps.get(tableAlias) != null) + { + tableAlias = tableAlias + this.uniqueCounter ; + } topOps.put(tableAlias, ts); if (LOG.isDebugEnabled()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f8fb475..60966a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -32,12 +32,14 @@ import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -54,6 +56,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; @@ -68,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -116,23 +120,42 @@ private InputCtx(RelDataType calciteInpDataType, ImmutableMap h private final RelOptCluster cluster; private final ImmutableList inputCtxs; private final boolean flattenExpr; + private final RowResolver outerRR; //outerRR belongs to outer query and is required to resolve correlated references + private final ImmutableMap outerNameToPosMap; + private int correlatedId; //Constructor used by HiveRexExecutorImpl public RexNodeConverter(RelOptCluster cluster) { this(cluster, new ArrayList(), false); } + //subqueries will need outer query's row resolver + public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, + ImmutableMap outerNameToPosMap, + ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) { + this.cluster = cluster; + this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset)); + this.flattenExpr = flattenExpr; + this.outerRR = outerRR; + this.outerNameToPosMap = outerNameToPosMap; + this.correlatedId = correlatedId; + } + public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { this.cluster = cluster; this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); this.flattenExpr = flattenExpr; + this.outerRR = null; + this.outerNameToPosMap = null; } public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { this.cluster = cluster; this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); this.flattenExpr = flattenExpr; + this.outerRR = null; + this.outerNameToPosMap = null; } public RexNode convert(ExprNodeDesc expr) throws SemanticException { @@ -144,12 +167,44 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { return convert((ExprNodeColumnDesc) expr); } else if (expr instanceof ExprNodeFieldDesc) { return convert((ExprNodeFieldDesc) expr); + } else if(expr instanceof ExprNodeSubQueryDesc) { + return convert((ExprNodeSubQueryDesc) expr); } else { throw new RuntimeException("Unsupported Expression"); } // TODO: handle ExprNodeColumnListDesc } + private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN) + { + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) + { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexNode for LHS + RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); + + //create RexSubQuery node + RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(), ImmutableList.of(rexNodeLhs) ); + return rexSubQuery; + } + else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) + { + RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); + return subQueryNode; + } + else { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Currently only IN and EXISTS type of subqueries are supported")); + } + } + private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); if (rexNode instanceof RexCall) { @@ -420,7 +475,7 @@ private static boolean checkForStatefulFunctions(List list) { private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { InputCtx ctxLookingFor = null; - if (inputCtxs.size() == 1) { + if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) { ctxLookingFor = inputCtxs.get(0); } else { String tableAlias = col.getTabAlias(); @@ -443,7 +498,21 @@ private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { } protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { + //if this is co-rrelated we need to make RexCorrelVariable(with id and type) + // id and type should be retrieved from outerRR InputCtx ic = getInputCtx(col); + if(ic == null) { + // we have correlated column, build data type from outer rr + RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null); + if (this.outerNameToPosMap.get(col.getColumn()) == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn())); + } + + int pos = this.outerNameToPosMap.get(col.getColumn()); + CorrelationId colCorr = new CorrelationId(this.correlatedId); + RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); + return cluster.getRexBuilder().makeFieldAccess(corExpr, pos); + } int pos = ic.hiveNameToPosMap.get(col.getColumn()); return cluster.getRexBuilder().makeInputRef( ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f1f3bf9..a9758be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -24,9 +24,11 @@ import java.math.BigDecimal; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; +import java.util.Deque ; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; @@ -91,6 +93,7 @@ import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; @@ -135,6 +138,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -191,6 +195,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; @@ -961,6 +966,8 @@ private RowResolver genRowResolver(Operator op, QB qb) { private final Map partitionCache; private final ColumnAccessInfo columnAccessInfo; private Map viewProjectToTableSchema; + private int subqueryId; //correlated vars across subqueries within same query needs to have different ID + // this will be used in RexNodeConverter to create cor var // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -977,6 +984,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu RelNode calciteGenPlan = null; RelNode calcitePreCboPlan = null; RelNode calciteOptimizedPlan = null; + subqueryId = -1; /* * recreate cluster, so that it picks up the additional traitDef @@ -1001,7 +1009,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // 1. Gen Calcite Plan perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { - calciteGenPlan = genLogicalPlan(getQB(), true); + calciteGenPlan = genLogicalPlan(getQB(), true, null, null); resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema( relToHiveRR.get(calciteGenPlan), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); @@ -1025,6 +1033,15 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create executor Executor executorProvider = new HiveRexExecutorImpl(cluster); + //Remove subquery + LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, + HiveSubQueryRemoveRule.FILTER); + LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + + calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); + LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); @@ -1938,8 +1955,9 @@ private TableType obtainTableType(Table tabMetaData) { } private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean useCaching) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), useCaching); + ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), outerRR, null, useCaching); if (filterCondn instanceof ExprNodeConstantDesc && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { // queries like select * from t1 where 'foo'; @@ -1955,7 +1973,7 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap .get(srcRel); RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - hiveColNameCalcitePosMap, 0, true).convert(filterCondn); + outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, 0, true, subqueryId).convert(filterCondn); RexNode factoredFilterExpr = RexUtil .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), @@ -1967,66 +1985,43 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, return filterRel; } - private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, - Map aliasToRel, boolean forHavingClause) throws SemanticException { - /* - * Handle Subquery predicates. - * - * Notes (8/22/14 hb): Why is this a copy of the code from {@link - * #genFilterPlan} - for now we will support the same behavior as non CBO - * route. - but plan to allow nested SubQueries(Restriction.9.m) and - * multiple SubQuery expressions(Restriction.8.m). This requires use to - * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush - * out Null semantics(CALCITE-373) - besides only the driving code has - * been copied. Most of the code which is SubQueryUtils and QBSubQuery is - * reused. - */ - int numSrcColumns = srcRel.getRowType().getFieldCount(); - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - if (subQueriesInOriginalTree.size() > 0) { + private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel, boolean forHavingClause, Map aliasToRel ) throws SemanticException { + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { /* * Restriction.9.m :: disallow nested SubQuery expressions. */ - if (qb.getSubQueryPredicateDef() != null) { - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); - } + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } /* * Restriction.8.m :: We allow only 1 SubQuery expression per Query. */ - if (subQueriesInOriginalTree.size() > 1) { - - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); - } + if (subQueriesInOriginalTree.size() > 1) { - /* - * Clone the Search AST; apply all rewrites on the clone. - */ - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outerQBRR = inputRR; - ImmutableMap outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel); - - for (int i = 0; i < subQueries.size(); i++) { - ASTNode subQueryAST = subQueries.get(i); - ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + //we do not care about the transformation or rewriting of AST which following statement does + // we only care about the restriction checks they perform. + // We plan to get rid of these restrictions later int sqIdx = qb.incrNumSubQueryPredicates(); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0); + + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + ASTNode subQueryAST = subQueries.get(0); clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); + originalSubQueryAST, ctx); + + RowResolver inputRR = relToHiveRR.get(srcRel); - if (!forHavingClause) { - qb.setWhereClauseSubQueryPredicate(subQuery); - } else { - qb.setHavingClauseSubQueryPredicate(subQuery); - } String havingInputAlias = null; if (forHavingClause) { @@ -2035,78 +2030,72 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, } subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, - aliasToRel.keySet()); - - QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); - qbSQ.setSubQueryDef(subQuery.getSubQuery()); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); - getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); - aliasToRel.put(subQuery.getAlias(), subQueryRelNode); - RowResolver sqRR = relToHiveRR.get(subQueryRelNode); - - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if (subQuery.getOperator().getType() != SubQueryType.EXISTS - && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { - throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, - "SubQuery can contain only 1 item in Select List.")); - } - - /* - * If this is a Not In SubQuery Predicate then Join in the Null Check - * SubQuery. See QBSubQuery.NotInCheck for details on why and how this - * is constructed. - */ - if (subQuery.getNotInCheck() != null) { - QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); - notInCheck.setSQRR(sqRR); - QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); - qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); - ctx_1 = initPhase1Ctx(); - doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); - getMetaData(qbSQ_nic); - RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); - aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); - srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, - // set explicitly to inner until we figure out SemiJoin use - // notInCheck.getJoinType(), - JoinType.INNER, notInCheck.getJoinConditionAST()); - inputRR = relToHiveRR.get(srcRel); - if (forHavingClause) { - aliasToRel.put(havingInputAlias, srcRel); + aliasToRel.keySet()); + + // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or + // explicitly only contain one select item. + } + } + private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, Map subQueryToRelNode, + Map aliasToRel) throws SemanticException { + + //disallow subqueries which HIVE doesn't currently support + subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel); + Deque stack = new ArrayDeque(); + stack.push(node); + + boolean isSubQuery = false; + + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); + + switch(next.getType()) { + case HiveParser.TOK_SUBQUERY_EXPR: + { + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); + QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1((ASTNode)next.getChild(1), qbSQ, ctx_1, null); + getMetaData(qbSQ); + subqueryId++; + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel)); + subQueryToRelNode.put(next, subQueryRelNode); + isSubQuery = true; + break; } + default: + int childCount = next.getChildCount(); + for(int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } } - - /* - * Gen Join between outer Operator and SQ op - */ - subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); - srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), - subQuery.getJoinConditionAST()); - searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); - - srcRel = genFilterRelNode(searchCond, srcRel, forHavingClause); - - /* - * For Not Exists and Not In, add a projection on top of the Left - * Outer Join. - */ - if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { - srcRel = projectLeftOuterSide(srcRel, numSrcColumns); - } - } - relToHiveRR.put(srcRel, outerQBRR); - relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); - return srcRel; } + return isSubQuery; + } + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { + + Map subQueryToRelNode = new HashMap<>(); + boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, subQueryToRelNode, aliasToRel); + if(isSubQuery) { + ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel), outerRR, subQueryToRelNode, forHavingClause); + + ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap + .get(srcRel); + RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(), + outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, 0, true, subqueryId).convert(subQueryExpr); - return genFilterRelNode(searchCond, srcRel, forHavingClause); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, convertedFilterLHS); + + this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap + .get(srcRel)); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + return filterRel; + } + else { + return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause); + } } private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { @@ -2133,14 +2122,14 @@ private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws Sema } private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, - boolean forHavingClause) throws SemanticException { + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { RelNode filterRel = null; Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() .iterator(); if (whereClauseIterator.hasNext()) { filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, - aliasToRel, forHavingClause); + aliasToRel, outerNameToPosMap, outerRR, forHavingClause); } return filterRel; @@ -3425,7 +3414,7 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { switch (qbexpr.getOpcode()) { case NULLOP: - return genLogicalPlan(qbexpr.getQB(), false); + return genLogicalPlan(qbexpr.getQB(), false, null, null); case UNION: case INTERSECT: case INTERSECTALL: @@ -3440,7 +3429,7 @@ private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { } } - private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { + private RelNode genLogicalPlan(QB qb, boolean outerMostQB, ImmutableMap outerNameToPosMap, RowResolver outerRR ) throws SemanticException { RelNode srcRel = null; RelNode filterRel = null; RelNode gbRel = null; @@ -3513,7 +3502,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept } // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false); srcRel = (filterRel == null) ? srcRel : filterRel; RelNode starSrcRel = srcRel; @@ -3614,7 +3603,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map subqueryToRelNode, boolean useCaching) + throws SemanticException { + + TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false); + tcCtx.setOuterRR(outerRR); + tcCtx.setSubqueryToRelNode(subqueryToRelNode); + return genExprNodeDesc(expr, input, tcCtx); + } + + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching) throws SemanticException { return genExprNodeDesc(expr, input, useCaching, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index 02896ff..fabde53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.calcite.rel.RelNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import java.util.Map; /** * This class implements the context information that is used for typechecking @@ -35,6 +37,19 @@ */ private RowResolver inputRR; + /** + * RowResolver of outer query. This is used to resolve co-rrelated columns in Filter + * TODO: + * this currently will only be able to resolve reference to parent query's column + * this will not work for references to grand-parent column + */ + private RowResolver outerRR; + + /** + * Map from astnode of a subquery to it's logical plan + */ + private Map subqueryToRelNode ; + private final boolean useCaching; private final boolean foldExpr; @@ -104,6 +119,8 @@ public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, this.allowWindowing = allowWindowing; this.allowIndexExpr = allowIndexExpr; this.allowSubQueryExpr = allowSubQueryExpr; + this.outerRR = null; + this.subqueryToRelNode = null; } /** @@ -122,6 +139,36 @@ public RowResolver getInputRR() { } /** + * @param outerRR + * the outerRR to set + */ + public void setOuterRR(RowResolver outerRR) { + this.outerRR = outerRR; + } + + /** + * @return the outerRR + */ + public RowResolver getOuterRR() { + return outerRR; + } + + /** + * @param subqueryToRelNode + * the subqueryToRelNode to set + */ + public void setSubqueryToRelNode(Map subqueryToRelNode) { + this.subqueryToRelNode = subqueryToRelNode; + } + + /** + * @return the outerRR + */ + public Map getSubqueryToRelNode() { + return subqueryToRelNode; + } + + /** * @param unparseTranslator * the unparseTranslator to set */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index ace3eaf..2fcf0a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.Stack; +import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.hive.common.type.HiveChar; @@ -43,7 +44,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.ExpressionWalker; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; @@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -134,7 +136,9 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) ASTNode expr = (ASTNode) nd; TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (!ctx.isUseCaching()) { + // bypass only if outerRR is not null. Otherwise we need to look for expressions in outerRR for + // subqueries e.g. select min(b.value) from table b group by b.key having key in (select .. where a = min(b.value) + if (!ctx.isUseCaching() && ctx.getOuterRR() == null) { return null; } @@ -147,6 +151,13 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); + + // try outer row resolver + RowResolver outerRR = ctx.getOuterRR(); + if( colInfo == null && outerRR != null) + { + colInfo = outerRR.getExpression(expr); + } if (colInfo != null) { desc = new ExprNodeColumnDesc(colInfo); ASTNode source = input.getExpressionSource(expr); @@ -201,14 +212,14 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) + HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor()); opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor()); - opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_OP + "%"), + opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); + GraphWalker ogw = new ExpressionWalker(disp); // Create a list of top nodes ArrayList topNodes = Lists.newArrayList(expr); @@ -618,6 +629,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = input.get(null, tableOrCol); + // try outer row resolver + if(ctx.getOuterRR() != null && colInfo == null && !isTableAlias) + { + RowResolver outerRR = ctx.getOuterRR(); + isTableAlias = outerRR.hasTableAlias(tableOrCol); + colInfo = outerRR.get(null, tableOrCol); + } + if (isTableAlias) { if (colInfo != null) { if (parent != null && parent.getType() == HiveParser.DOT) { @@ -1158,6 +1177,13 @@ protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, } ColumnInfo colInfo = input.get(tableAlias, colName); + // Try outer Row resolver + if(colInfo == null && ctx.getOuterRR() != null) + { + RowResolver outerRR = ctx.getOuterRR(); + colInfo = outerRR.get(tableAlias, colName); + } + if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; @@ -1222,6 +1248,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } + if(expr.getType() == HiveParser.TOK_SUBQUERY_OP || expr.getType() == HiveParser.TOK_QUERY) { + return null; + } + if (expr.getType() == HiveParser.TOK_TABNAME) { return null; } @@ -1379,11 +1409,43 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return desc; } + //TOK_SUBQUERY_EXPR should have either 2 or 3 children + assert(expr.getChildren().size() == 3 || expr.getChildren().size() == 2); + assert(expr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP); //First child should be operand + + ASTNode subqueryOp = (ASTNode) expr.getChild(0); + + boolean isIN = (subqueryOp.getChild(0).getType() == HiveParser.KW_IN + || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN) ; + boolean isEXISTS = (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS + || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS) ; + + // subqueryToRelNode might be null if subquery expression anywhere other than + // as expected in filter (where/having). We should throw an appropriate error + // message + + Map subqueryToRelNode = ctx.getSubqueryToRelNode(); + if(subqueryToRelNode == null) + { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + " Currently SubQuery expressions are only allowed as Where and Having Clause predicates")); + } + + //For now because subquery is only supported in filter we will create subquery expression of boolean type + if(isEXISTS) { + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), ExprNodeSubQueryDesc.SubqueryType.EXISTS); + } + if(isIN) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), ExprNodeSubQueryDesc.SubqueryType.IN, lhs); + } + /* * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. */ ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, - "Currently SubQuery expressions are only allowed as Where Clause predicates"), + "Currently only IN & EXISTS SubQuery expressions are allowed"), sqNode); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java new file mode 100644 index 0000000..032d7e2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.calcite.rel.RelNode; + +/** + * This encapsulate subquery expression which consists of + * Relnode for subquery + * type (IN, EXISTS ) + * LHS operand + */ +public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable { + private static final long serialVersionUID = 1L; + + public static enum SubqueryType{ + IN, + EXISTS, + }; + + public static final int IN=1; + public static final int EXISTS=2; + + /** + * RexNode corresponding to subquery + */ + private RelNode rexSubQuery; + private ExprNodeDesc subQueryLhs; + private SubqueryType type; + + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = null; + this.type = type; + } + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type, ExprNodeDesc lhs) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = lhs; + this.type = type; + + } + + public SubqueryType getType() { + return type; + } + + public ExprNodeDesc getSubQueryLhs() { + return subQueryLhs; + } + + public RelNode getRexSubQuery() { + return rexSubQuery; + } + + @Override + public ExprNodeDesc clone() { + return new ExprNodeSubQueryDesc(typeInfo, rexSubQuery, type, subQueryLhs); + } + + @Override + public boolean isSame(Object o) { + if (!(o instanceof ExprNodeSubQueryDesc)) { + return false; + } + ExprNodeSubQueryDesc dest = (ExprNodeSubQueryDesc) o; + if (subQueryLhs != null && dest.getSubQueryLhs() != null) { + if (!subQueryLhs.equals(dest.getSubQueryLhs())) { + return false; + } + } + if (!typeInfo.equals(dest.getTypeInfo())) { + return false; + } + if (!rexSubQuery.equals(dest.getRexSubQuery())) { + return false; + } + if(type != dest.getType()) { + return false; + } + return true; + } +} diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q new file mode 100644 index 0000000..80870d7 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_restrictions.q @@ -0,0 +1,92 @@ +--Restriction.1.h SubQueries only supported in the SQL Where Clause. +select src.key in (select key from src s1 where s1.key > '9') +from src; + +select count(*) +from src +group by src.key in (select key from src s1 where s1.key > '9') ; + +--Restriction.2.h The subquery can only be the RHS of an expression +----curently paser doesn't allow such queries +--select * from part where (select p_size from part) IN (1,2); + +--Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists. +----select * from part where p_brand > (select key from src); + +--Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates. +select * from src where exists (select * from part); + +--Check.5.h multiple columns in subquery select +select * from src where src.key in (select * from src s1 where s1.key > '9'); + +--Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified +--This is not restriction anymore + +--Restriction 7.h subquery with or condition +select count(*) +from src +where src.key in (select key from src s1 where s1.key > '9') or src.value is not null +; + +--Restriction.8.m We allow only 1 SubQuery expression per Query +select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part); + +--Restriction 9.m nested subquery +select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +; + +--Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause. +select * from part where p_size in (select p.p_size + part.p_size from part p); +select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type); + + +--Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate +select * from part where p_size in (select p_size from part p where p.p_type > part.p_type); +select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type)); + + +--Check.12.h SubQuery predicates cannot only refer to Parent Query columns +select * from part where p_name IN (select p_name from part p where part.p_type <> 1); + +--Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don’t exist. +select * +from src b +where exists + (select count(*) + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +; + +--Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses. +select p_mfgr, p_name, p_size +from part a +where a.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) + from part b + where a.p_brand = b.p_brand) +; + +--Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery. +select * +from src +where src.key in (select key from src where key > '9') +; + +---------------------------------------------------------------- +-- Following tests does not fall under any restrictions per-se, they just currently don't work with HIVE +---------------------------------------------------------------- + +-- correlated var which refers to outer query join table +explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ; + +-- union, not in, corr +explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part); + +-- union, not in, corr, cor var in both queries +explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part pp where pp.p_mfgr = part.p_comment); + +-- IN, union, corr +explain select * from part where p_name IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part); diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q index cbfb5d5..8936073 100644 --- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q +++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q @@ -1,5 +1,6 @@ set hive.cbo.returnpath.hiveop=true; set hive.stats.fetch.column.stats=true; +set hive.enable.semijoin.conversion=true; ; set hive.exec.reducers.max = 1; diff --git a/ql/src/test/queries/clientpositive/join31.q b/ql/src/test/queries/clientpositive/join31.q index c79105f..aa17b4d 100644 --- a/ql/src/test/queries/clientpositive/join31.q +++ b/ql/src/test/queries/clientpositive/join31.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.enable.semijoin.conversion=true; -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key STRING, cnt INT); diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q index cf5dbb0..c66dc66 100644 --- a/ql/src/test/queries/clientpositive/multiMapJoin2.q +++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q @@ -3,6 +3,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org. set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=6000; +set hive.enable.semijoin.conversion=true; -- we will generate one MR job. EXPLAIN diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index c01ae70..4610304 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; + -- SORT_QUERY_RESULTS -- non agg, non corr @@ -118,3 +119,103 @@ from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li o where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) ; + + +--where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340; +select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340; + +--lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type); +select * from part where (p_size-1) IN (select min(p_size) from part group by p_type); + +explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type); +select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type); + +--lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name); +select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name); + +-- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type); +select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type); + +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); +select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); + +-- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand); +select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand); + +-- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size); +select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size); + +-- where plus having +explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); +select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); + +-- where with having, correlated +explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); +select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); + +-- subquery with order by +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand; +select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand; + +--order by with limit +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4; +select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4; + +-- union, uncorr +explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part); +select * from src where key IN (select p_name from part UNION ALL select p_brand from part); + +-- corr, subquery has another subquery in from +explain select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size; +select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size; + +-- join in subquery, correlated predicate with only one table +explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size); +select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size); + +-- join in subquery, correlated predicate with both inner tables, same outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size); +select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size); + +-- join in subquery, correlated predicate with both inner tables, different outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type); + +-- subquery within from +explain select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq; +select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq; + + +create table tempty(i int); +create table tnull(i int); +insert into tnull values(NULL) , (NULL); + +-- empty inner table, non-null sq key, expected empty result +select * from part where p_size IN (select i from tempty); + +-- empty inner table, null sq key, expected empty result +select * from tnull where i IN (select i from tempty); + +-- null inner table, non-null sq key +select * from part where p_size IN (select i from tnull); + +-- null inner table, null sq key +select * from tnull where i IN (select i from tnull); + +drop table tempty; diff --git a/ql/src/test/queries/clientnegative/subquery_nested_subquery.q b/ql/src/test/queries/clientpositive/subquery_nested_subquery.q similarity index 100% rename from ql/src/test/queries/clientnegative/subquery_nested_subquery.q rename to ql/src/test/queries/clientpositive/subquery_nested_subquery.q diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q index 3f4fb7f..c29e63f 100644 --- a/ql/src/test/queries/clientpositive/subquery_notin.q +++ b/ql/src/test/queries/clientpositive/subquery_notin.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; + -- non agg, non corr explain select * @@ -76,10 +77,10 @@ order by p_mfgr, p_size ; -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey ; @@ -103,3 +104,127 @@ from T1_v where T1_v.key not in (select T2_v.key from T2_v); select * from T1_v where T1_v.key not in (select T2_v.key from T2_v); + +--where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340; +select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340; + +--lhs contains non-simple expression +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type); +select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type); + +explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type); +select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type); + +--lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name); +select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name); + +-- lhs contains udf expression +explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type); +select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type); + +explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); +select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ); + +-- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); +select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ); + +-- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand); +select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand); + +-- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size); +select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size); + +-- where plus having +explain select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); +select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); + +-- where with having, correlated +explain select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); +select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ); + +-- subquery with order by +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand; +select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand; + +--order by with limit +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4; +select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4; + +-- union, uncorr +explain select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part); +select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part); + +explain select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand); +select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand); + +--nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)); +INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij'); + +CREATE TABLE t2 (c1 INT); +INSERT INTO t2 VALUES (null), (2), (100); + +-- uncorr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2); +SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2); + +-- corr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c2=t2.c1); +SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1); + +DROP TABLE t1; +DROP TABLE t2; + +-- corr, nullability, should not produce any result +create table t1(a int, b int); +insert into t1 values(1,0), (1,0),(1,0); + +create table t2(a int, b int); +insert into t2 values(2,1), (3,1), (NULL,1); + +explain select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a); +select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a); +drop table t1; +drop table t2; + + +-- coor, nullability, should produce result +create table t7(i int, j int); +insert into t7 values(null, 5), (4, 15); + +create table fixOb(i int, j int); +insert into fixOb values(-1, 5), (-1, 15); + +explain select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j); +select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j); + +drop table t7; +drop table fixOb; + +create table t(i int, j int); +insert into t values(1,2), (4,5), (7, NULL); + + +-- case with empty inner result (t1.j=t.j=NULL) and null subquery key(t.j = NULL) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j); +select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j); + +-- case with empty inner result (t1.j=t.j=NULL) and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j); +select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j); + +-- case with non-empty inner result and null subquery key(t.j is null) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 ); +select t.i from t where t.j NOT IN (select t1.i from t t1 ); + +-- case with non-empty inner result and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 ); +select t.i from t where t.i NOT IN (select t1.i from t t1 ); + +drop table t1; + diff --git a/ql/src/test/queries/clientpositive/subquery_notin_having.q b/ql/src/test/queries/clientpositive/subquery_notin_having.q index 05148df..2cd0bec 100644 --- a/ql/src/test/queries/clientpositive/subquery_notin_having.q +++ b/ql/src/test/queries/clientpositive/subquery_notin_having.q @@ -56,3 +56,19 @@ having b.p_mfgr not in having max(p_retailprice) - min(p_retailprice) > 600 ) ; + +--nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)); +INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij'); + +CREATE TABLE t2 (c1 INT); +INSERT INTO t2 VALUES (null), (2), (100); + +explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2); +SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2); + +explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1); +SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1); + +DROP TABLE t1; +DROP TABLE t2; diff --git a/ql/src/test/queries/clientnegative/subquery_shared_alias.q b/ql/src/test/queries/clientpositive/subquery_shared_alias.q similarity index 100% rename from ql/src/test/queries/clientnegative/subquery_shared_alias.q rename to ql/src/test/queries/clientpositive/subquery_shared_alias.q diff --git a/ql/src/test/results/clientnegative/subquery_in_groupby.q.out b/ql/src/test/results/clientnegative/subquery_in_groupby.q.out index 809bb0a..a546d49 100644 --- a/ql/src/test/results/clientnegative/subquery_in_groupby.q.out +++ b/ql/src/test/results/clientnegative/subquery_in_groupby.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10249]: Line 5:37 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates diff --git a/ql/src/test/results/clientnegative/subquery_in_select.q.out b/ql/src/test/results/clientnegative/subquery_in_select.q.out index 3d74132..a546d49 100644 --- a/ql/src/test/results/clientnegative/subquery_in_select.q.out +++ b/ql/src/test/results/clientnegative/subquery_in_select.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10249]: Line 4:35 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates diff --git a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out b/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out deleted file mode 100644 index 140b093..0000000 --- a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException [Error 10249]: Line 3:53 Unsupported SubQuery Expression 'p_name': Nested SubQuery expressions are not supported. diff --git a/ql/src/test/results/clientnegative/subquery_restrictions.q.out b/ql/src/test/results/clientnegative/subquery_restrictions.q.out new file mode 100644 index 0000000..a546d49 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_restrictions.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out index 58f1065..066ed07 100644 --- a/ql/src/test/results/clientpositive/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -88,7 +88,7 @@ POSTHOOK: query: analyze table supplier_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@supplier_hive #### A masked pattern was here #### -Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select p_brand, p_type, @@ -154,10 +154,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-7 - Stage-3 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-4 depends on stages: Stage-3 Stage-5 depends on stages: Stage-4 Stage-7 is a root stage + Stage-8 is a root stage Stage-0 depends on stages: Stage-5 STAGE PLANS: @@ -223,7 +224,8 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -231,8 +233,8 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -248,23 +250,15 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint) TableScan - alias: supplier_hive - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (s_comment like '%Customer%Complaints%') (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -272,21 +266,21 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col7 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col7 is null (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col6 = 0) or (_col9 is null and _col1 is not null and (_col7 >= _col6))) (type: boolean) + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) outputColumnNames: _col3, _col4, _col5, _col1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT _col1) keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -302,14 +296,14 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col3:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -324,15 +318,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: -+++ - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,36 +339,69 @@ STAGE PLANS: alias: supplier_hive Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((s_comment like '%Customer%Complaints%') and s_suppkey is null) (type: boolean) + predicate: (s_comment like '%Customer%Complaints%') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: s_suppkey Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count() + aggregations: count(), count(s_suppkey) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: supplier_hive + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (s_comment like '%Customer%Complaints%') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out index d1016ad..d4ccb8c 100644 --- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out @@ -80,10 +80,114 @@ WHERE li.l_linenumber = 1 AND li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_linenumber (type: int) + outputColumnNames: l_linenumber + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_linenumber (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_shipmode = 'AIR') (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -91,7 +195,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) + predicate: (l_linenumber = 1) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -104,29 +208,15 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index f993cf0..d3acbcd 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -3297,17 +3297,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3340,7 +3340,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3365,7 +3365,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3402,35 +3402,45 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 8 + Target Vertex: Map 1 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3438,34 +3448,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 Union 6 Vertex: Union 6 @@ -3505,17 +3497,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3548,7 +3540,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3573,7 +3565,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3612,35 +3604,45 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 8 + Target Vertex: Map 1 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3648,34 +3650,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 Union 6 Vertex: Union 6 @@ -3716,9 +3700,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS) + Reducer 10 <- Union 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### @@ -3727,7 +3712,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3741,7 +3725,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: srcpart @@ -3765,7 +3749,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3799,57 +3782,67 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 + Target Vertex: Map 5 + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3868,7 +3861,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3902,49 +3895,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 Union 3 Vertex: Union 3 Union 9 @@ -5688,45 +5648,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Union 5 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -5746,7 +5690,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -5766,32 +5710,42 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -5802,22 +5756,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 7 + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5825,36 +5779,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Union 5 - Vertex: Union 5 + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 70ec02f..43d7e52 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1803,46 +1803,113 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=1 width=178) + File Output Operator [FS_52] + Select Operator [SEL_51] (rows=65 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_12] (rows=1 width=269) - predicate:_col3 is null - Merge Join Operator [MERGEJOIN_17] (rows=500 width=269) - Conds:RS_9._col1=RS_10._col1(Left Outer),Output:["_col0","_col1","_col3"] + Filter Operator [FIL_50] (rows=65 width=198) + predicate:CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END + Merge Join Operator [MERGEJOIN_67] (rows=130 width=198) + Conds:RS_46._col1=RS_47._col0(Left Outer),RS_46._col1=RS_48._col0(Left Outer),Output:["_col0","_col1","_col3","_col4","_col6"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + SHUFFLE [RS_46] PartitionCols:_col1 Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] TableScan [TS_0] (rows=500 width=178) default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=83 width=178) - Output:["_col1"] - Group By Operator [GBY_7] (rows=83 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0, _col1 - Group By Operator [GBY_5] (rows=83 width=178) - Output:["_col0","_col1"],keys:value, key - Select Operator [SEL_4] (rows=166 width=178) - Output:["value","key"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:(value > 'val_2') - TableScan [TS_2] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_48] + PartitionCols:_col0 + Select Operator [SEL_45] (rows=56 width=95) + Output:["_col0","_col1"] + Group By Operator [GBY_44] (rows=56 width=91) + Output:["_col0"],keys:_col1 + Select Operator [SEL_40] (rows=83 width=178) + Output:["_col1"] + Group By Operator [GBY_39] (rows=83 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col0 + Group By Operator [GBY_37] (rows=83 width=178) + Output:["_col0","_col1"],keys:_col2, _col0 + Select Operator [SEL_36] (rows=166 width=178) + Output:["_col2","_col0"] + Merge Join Operator [MERGEJOIN_66] (rows=166 width=178) + Conds:RS_33._col1=RS_34._col0(Inner),Output:["_col0","_col2"] + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] + PartitionCols:_col1 + Select Operator [SEL_26] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_63] (rows=166 width=178) + predicate:(value > 'val_2') + TableScan [TS_24] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] + PartitionCols:_col0 + Group By Operator [GBY_31] (rows=214 width=91) + Output:["_col0"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=214 width=91) + Output:["_col0"],keys:value + TableScan [TS_27] (rows=500 width=91) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_47] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=56 width=107) + Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col1 + Select Operator [SEL_18] (rows=83 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=83 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=83 width=178) + Output:["_col0","_col1"],keys:_col2, _col0 + Select Operator [SEL_14] (rows=166 width=178) + Output:["_col2","_col0"] + Merge Join Operator [MERGEJOIN_65] (rows=166 width=178) + Conds:RS_11._col1=RS_12._col0(Inner),Output:["_col0","_col2"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col1 + Select Operator [SEL_4] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_61] (rows=166 width=178) + predicate:(value > 'val_2') + TableScan [TS_2] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_9] (rows=214 width=91) + Output:["_col0"],keys:KEY._col0 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=214 width=91) + Output:["_col0"],keys:value + TableScan [TS_5] (rows=500 width=91) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: explain select * from src_cbo b @@ -1865,32 +1932,66 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage +Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=1 width=178) + File Output Operator [FS_56] + Select Operator [SEL_55] (rows=1 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_12] (rows=1 width=265) - predicate:_col3 is null - Merge Join Operator [MERGEJOIN_17] (rows=250 width=265) - Conds:RS_9._col0, _col1=RS_10._col1, _col0(Left Outer),Output:["_col0","_col1","_col3"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col1, _col0 - Select Operator [SEL_8] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:(value > 'val_12') - TableScan [TS_6] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Filter Operator [FIL_54] (rows=1 width=198) + predicate:CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END + Merge Join Operator [MERGEJOIN_71] (rows=1 width=198) + Conds:RS_50._col0, _col1=RS_51._col0, _col1(Left Outer),RS_50._col0, _col1=RS_52._col0, _col1(Left Outer),Output:["_col0","_col1","_col4","_col5","_col8"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_52] + PartitionCols:_col0, _col1 + Select Operator [SEL_49] (rows=1 width=182) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_48] (rows=1 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_47] + PartitionCols:_col0, _col1 + Group By Operator [GBY_46] (rows=1 width=178) + Output:["_col0","_col1"],keys:_col2, _col3 + Merge Join Operator [MERGEJOIN_70] (rows=1 width=178) + Conds:RS_42._col0, _col1=RS_43._col0, _col1(Inner),Output:["_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] + PartitionCols:_col0, _col1 + Select Operator [SEL_30] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_67] (rows=166 width=178) + predicate:(value > 'val_12') + TableScan [TS_28] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_43] + PartitionCols:_col0, _col1 + Group By Operator [GBY_40] (rows=250 width=178) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_35] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] + PartitionCols:_col0, _col1 + Group By Operator [GBY_33] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_31] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + SHUFFLE [RS_50] PartitionCols:_col0, _col1 Group By Operator [GBY_4] (rows=250 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 @@ -1903,6 +2004,43 @@ Stage-0 Output:["key","value"] TableScan [TS_0] (rows=500 width=178) default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_51] + PartitionCols:_col0, _col1 + Group By Operator [GBY_26] (rows=1 width=194) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] + PartitionCols:_col0, _col1 + Group By Operator [GBY_24] (rows=1 width=194) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()","count(_col0)"],keys:_col2, _col3 + Select Operator [SEL_23] (rows=1 width=265) + Output:["_col2","_col3","_col0"] + Merge Join Operator [MERGEJOIN_69] (rows=1 width=265) + Conds:RS_20._col0, _col1=RS_21._col0, _col1(Inner),Output:["_col0","_col2","_col3"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] + PartitionCols:_col0, _col1 + Select Operator [SEL_8] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_65] (rows=166 width=178) + predicate:(value > 'val_12') + TableScan [TS_6] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_18] (rows=250 width=178) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_13] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_9] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: create view cv1 as select * @@ -1933,36 +2071,59 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=178) - Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"] + File Output Operator [FS_23] + Merge Join Operator [MERGEJOIN_33] (rows=1 width=178) + Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_19] PartitionCols:_col0, _col1 - Select Operator [SEL_2] (rows=166 width=178) + Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=166 width=178) - predicate:((value > 'val_9') and key is not null) - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=83 width=178) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:((value > 'val_9') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Group By Operator [GBY_17] (rows=1 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=1 width=178) + Output:["_col0","_col1"],keys:_col2, _col3 + Merge Join Operator [MERGEJOIN_32] (rows=1 width=178) + Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_30] (rows=166 width=178) + predicate:(value > 'val_9') + TableScan [TS_2] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0, _col1 + Group By Operator [GBY_7] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_5] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} PREHOOK: query: explain select * from (select * @@ -1985,36 +2146,59 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=178) - Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"] + File Output Operator [FS_23] + Merge Join Operator [MERGEJOIN_33] (rows=1 width=178) + Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_19] PartitionCols:_col0, _col1 - Select Operator [SEL_2] (rows=166 width=178) + Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=166 width=178) - predicate:((value > 'val_9') and key is not null) - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=83 width=178) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:((value > 'val_9') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Group By Operator [GBY_17] (rows=1 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=1 width=178) + Output:["_col0","_col1"],keys:_col2, _col3 + Merge Join Operator [MERGEJOIN_32] (rows=1 width=178) + Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_30] (rows=166 width=178) + predicate:(value > 'val_9') + TableScan [TS_2] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0, _col1 + Group By Operator [GBY_7] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_5] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select * from src_cbo @@ -2027,36 +2211,38 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=166 width=178) - Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] + File Output Operator [FS_13] + Merge Join Operator [MERGEJOIN_18] (rows=168 width=178) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) + Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=166 width=178) - predicate:(key > '9') - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] PartitionCols:_col0 Group By Operator [GBY_7] (rows=69 width=87) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=166 width=87) - Output:["_col0"] - Filter Operator [FIL_16] (rows=166 width=87) - predicate:(key > '9') - TableScan [TS_3] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Output:["_col0"],keys:KEY._col0 + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=69 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_17] (rows=166 width=87) + predicate:(key > '9') + TableScan [TS_2] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey @@ -2071,59 +2257,109 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_22] - Select Operator [SEL_21] (rows=4 width=8) + Reducer 4 llap + File Output Operator [FS_46] + Select Operator [SEL_45] (rows=5 width=8) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_32] (rows=4 width=8) - Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col2","_col4"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_31] (rows=13 width=8) - Conds:RS_15._col0, 1=RS_16._col0, _col1(Left Semi),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - PartitionCols:_col0, 1 - Select Operator [SEL_2] (rows=17 width=16) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_28] (rows=17 width=16) - predicate:((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) - TableScan [TS_0] (rows=100 width=16) - default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - PartitionCols:_col0, _col1 - Group By Operator [GBY_14] (rows=4 width=8) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=14 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_29] (rows=14 width=96) - predicate:((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) - TableScan [TS_3] (rows=100 width=96) - default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=50 width=4) - Output:["_col0"],keys:KEY._col0 + Merge Join Operator [MERGEJOIN_67] (rows=5 width=8) + Conds:RS_42._col1, _col4=RS_43._col0, _col1(Inner),Output:["_col0","_col3"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] + PartitionCols:_col1, _col4 + Merge Join Operator [MERGEJOIN_64] (rows=5 width=16) + Conds:RS_39._col0=RS_40._col1(Inner),Output:["_col0","_col1","_col3","_col4"] <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + SHUFFLE [RS_40] + PartitionCols:_col1 + Select Operator [SEL_9] (rows=17 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_60] (rows=17 width=16) + predicate:((l_linenumber = 1) and l_partkey is not null) + TableScan [TS_7] (rows=100 width=16) + default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] PartitionCols:_col0 - Group By Operator [GBY_9] (rows=50 width=4) - Output:["_col0"],keys:l_partkey - Filter Operator [FIL_30] (rows=100 width=4) - predicate:l_partkey is not null - TableScan [TS_6] (rows=100 width=4) - default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] + Group By Operator [GBY_5] (rows=50 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=50 width=4) + Output:["_col0"],keys:l_partkey + Filter Operator [FIL_59] (rows=100 width=4) + predicate:l_partkey is not null + TableScan [TS_0] (rows=100 width=4) + default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_43] + PartitionCols:_col0, _col1 + Group By Operator [GBY_37] (rows=4 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=4 width=8) + Output:["_col0","_col1"],keys:_col0, _col3 + Merge Join Operator [MERGEJOIN_66] (rows=14 width=8) + Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"] + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] + PartitionCols:_col1 + Select Operator [SEL_12] (rows=14 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_61] (rows=14 width=96) + predicate:(l_shipmode = 'AIR') + TableScan [TS_10] (rows=100 width=96) + default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] + <-Reducer 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_32] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=3 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=3 width=4) + Output:["_col0"],keys:_col2 + Merge Join Operator [MERGEJOIN_65] (rows=34 width=4) + Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=100 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_63] (rows=100 width=8) + predicate:l_partkey is not null + TableScan [TS_20] (rows=100 width=8) + default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Group By Operator [GBY_18] (rows=50 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=50 width=4) + Output:["_col0"],keys:l_partkey + Filter Operator [FIL_62] (rows=100 width=4) + predicate:l_partkey is not null + TableScan [TS_13] (rows=100 width=4) + default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] PREHOOK: query: explain select key, value, count(*) from src_cbo b @@ -2140,72 +2376,74 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_31] - Merge Join Operator [MERGEJOIN_44] (rows=34 width=186) - Conds:RS_27._col2=RS_28._col0(Left Semi),Output:["_col0","_col1","_col2"] + File Output Operator [FS_33] + Merge Join Operator [MERGEJOIN_46] (rows=34 width=186) + Conds:RS_29._col2=RS_30._col0(Inner),Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] + SHUFFLE [RS_29] PartitionCols:_col2 - Filter Operator [FIL_37] (rows=83 width=186) - predicate:_col2 is not null - Group By Operator [GBY_14] (rows=83 width=186) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=83 width=186) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_43] (rows=166 width=178) - Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=166 width=178) - predicate:(key > '8') - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Group By Operator [GBY_7] (rows=69 width=87) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=166 width=87) - Output:["_col0"] - Filter Operator [FIL_39] (rows=166 width=87) + Group By Operator [GBY_15] (rows=84 width=186) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1 + Group By Operator [GBY_13] (rows=84 width=186) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 + Merge Join Operator [MERGEJOIN_45] (rows=168 width=178) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=69 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=69 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_41] (rows=166 width=87) predicate:(key > '8') - TableScan [TS_3] (rows=500 width=87) + TableScan [TS_2] (rows=500 width=87) default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_26] (rows=34 width=8) - Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=69 width=8) - Output:["_col0"] - Filter Operator [FIL_40] (rows=69 width=8) - predicate:_col1 is not null - Select Operator [SEL_42] (rows=69 width=8) + Group By Operator [GBY_27] (rows=34 width=8) + Output:["_col0"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=34 width=8) + Output:["_col0"],keys:_col1 + Select Operator [SEL_44] (rows=69 width=8) Output:["_col1"] Group By Operator [GBY_22] (rows=69 width=95) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] llap + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 Group By Operator [GBY_20] (rows=69 width=95) Output:["_col0","_col1"],aggregations:["count()"],keys:key - Filter Operator [FIL_41] (rows=166 width=87) + Filter Operator [FIL_43] (rows=166 width=87) predicate:(key > '9') TableScan [TS_17] (rows=500 width=87) default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -2226,8 +2464,9 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -2236,41 +2475,42 @@ Stage-0 Reducer 3 llap File Output Operator [FS_21] Merge Join Operator [MERGEJOIN_26] (rows=6 width=227) - Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"] + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col1 - Select Operator [SEL_6] (rows=13 width=227) + Select Operator [SEL_5] (rows=13 width=227) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=227) + Group By Operator [GBY_4] (rows=13 width=227) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] + SHUFFLE [RS_3] PartitionCols:_col0, _col1 - Group By Operator [GBY_3] (rows=13 width=295) + Group By Operator [GBY_2] (rows=13 width=295) Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_24] (rows=26 width=223) - predicate:p_name is not null - TableScan [TS_0] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] - <-Reducer 5 [SIMPLE_EDGE] llap + TableScan [TS_0] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + <-Reducer 6 [SIMPLE_EDGE] llap SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=13 width=184) - Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=26 width=184) - Output:["_col0"] - Filter Operator [FIL_25] (rows=26 width=491) - predicate:first_value_window_0 is not null - PTF Operator [PTF_10] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_9] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:p_mfgr - TableScan [TS_7] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] + Group By Operator [GBY_15] (rows=13 width=184) + Output:["_col0"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=13 width=184) + Output:["_col0"],keys:_col0 + Select Operator [SEL_10] (rows=26 width=491) + Output:["_col0"] + PTF Operator [PTF_9] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_8] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:p_mfgr + TableScan [TS_6] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * from src_cbo @@ -2290,62 +2530,66 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_25] - Select Operator [SEL_24] (rows=1 width=178) + File Output Operator [FS_27] + Select Operator [SEL_26] (rows=250 width=178) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - Select Operator [SEL_22] (rows=1 width=178) + SHUFFLE [RS_25] + Select Operator [SEL_24] (rows=250 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_21] (rows=1 width=265) - predicate:_col3 is null - Merge Join Operator [MERGEJOIN_29] (rows=500 width=265) - Conds:RS_18._col0=RS_19._col0(Left Outer),Output:["_col0","_col1","_col3"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=166 width=87) - Output:["_col0"] - Filter Operator [FIL_27] (rows=166 width=87) - predicate:(key > '2') - TableScan [TS_12] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Filter Operator [FIL_23] (rows=250 width=198) + predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) + Merge Join Operator [MERGEJOIN_32] (rows=500 width=198) + Conds:RS_20._col0=RS_21._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + SHUFFLE [RS_20] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_28] (rows=500 width=178) - Conds:(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_31] (rows=500 width=194) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_17] Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] TableScan [TS_0] (rows=500 width=178) default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - Select Operator [SEL_11] (rows=1 width=8) - Filter Operator [FIL_10] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_8] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - Group By Operator [GBY_6] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Filter Operator [FIL_4] (rows=1 width=4) - predicate:false - Select Operator [SEL_3] (rows=500 width=4) - TableScan [TS_2] (rows=500 width=10) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE + SHUFFLE [RS_18] + Group By Operator [GBY_7] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + Group By Operator [GBY_5] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count()","count(key)"] + Filter Operator [FIL_29] (rows=166 width=87) + predicate:(key > '2') + TableScan [TS_2] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=69 width=91) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1 + Group By Operator [GBY_13] (rows=69 width=91) + Output:["_col0","_col1"],keys:_col0, true + Select Operator [SEL_11] (rows=166 width=87) + Output:["_col0"] + Filter Operator [FIL_30] (rows=166 width=87) + predicate:(key > '2') + TableScan [TS_9] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p_mfgr, b.p_name, p_size from part b @@ -2366,58 +2610,128 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_22] - Select Operator [SEL_21] (rows=1 width=223) + File Output Operator [FS_54] + Select Operator [SEL_53] (rows=13 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_20] (rows=1 width=344) - predicate:_col4 is null - Merge Join Operator [MERGEJOIN_27] (rows=26 width=344) - Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4"] - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0, _col1 - Select Operator [SEL_13] (rows=8 width=219) - Output:["_col0","_col1"] - Filter Operator [FIL_25] (rows=8 width=223) - predicate:(p_size < 10) - TableScan [TS_11] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + Filter Operator [FIL_52] (rows=13 width=243) + predicate:CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END + Merge Join Operator [MERGEJOIN_76] (rows=26 width=243) + Conds:RS_49._col0, _col1=RS_50._col3, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"] + <-Reducer 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_50] + PartitionCols:_col3, _col1 + Merge Join Operator [MERGEJOIN_75] (rows=2 width=223) + Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=4 width=223) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_34] (rows=4 width=219) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] + PartitionCols:_col0, _col1 + Group By Operator [GBY_32] (rows=4 width=219) + Output:["_col0","_col1"],keys:_col1, _col2 + Merge Join Operator [MERGEJOIN_74] (rows=8 width=219) + Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_21] (rows=8 width=219) + Output:["_col0","_col1"] + Filter Operator [FIL_69] (rows=8 width=223) + predicate:(p_size < 10) + TableScan [TS_19] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_26] (rows=5 width=98) + Output:["_col0"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=5 width=98) + Output:["_col0"],keys:p_mfgr + TableScan [TS_22] (rows=26 width=98) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"] + <-Reducer 16 [SIMPLE_EDGE] llap + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_40] (rows=13 width=121) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col0 + Group By Operator [GBY_38] (rows=13 width=121) + Output:["_col0"],keys:p_name + TableScan [TS_36] (rows=26 width=121) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] + SHUFFLE [RS_49] PartitionCols:_col0, _col1 - Merge Join Operator [MERGEJOIN_26] (rows=26 width=223) - Conds:(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_73] (rows=26 width=239) + Conds:RS_46._col1=RS_47._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + SHUFFLE [RS_46] + PartitionCols:_col1 Select Operator [SEL_1] (rows=26 width=223) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=26 width=223) default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - Select Operator [SEL_10] (rows=1 width=8) - Filter Operator [FIL_9] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_7] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_4] (rows=1 width=223) - Filter Operator [FIL_24] (rows=1 width=223) - predicate:((p_size < 10) and (p_name is null or p_mfgr is null)) - TableScan [TS_2] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_47] + PartitionCols:_col0 + Group By Operator [GBY_17] (rows=2 width=114) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=2 width=114) + Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col1)"],keys:_col2 + Select Operator [SEL_14] (rows=8 width=219) + Output:["_col2","_col1"] + Merge Join Operator [MERGEJOIN_72] (rows=8 width=219) + Conds:RS_11._col0=RS_12._col0(Inner),Output:["_col1","_col2"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=8 width=219) + Output:["_col0","_col1"] + Filter Operator [FIL_67] (rows=8 width=223) + predicate:(p_size < 10) + TableScan [TS_2] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_9] (rows=5 width=98) + Output:["_col0"],keys:KEY._col0 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=5 width=98) + Output:["_col0"],keys:p_mfgr + TableScan [TS_5] (rows=26 width=98) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"] PREHOOK: query: explain select p_name, p_size from @@ -2439,71 +2753,73 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_35] - Select Operator [SEL_34] (rows=1 width=125) + File Output Operator [FS_36] + Select Operator [SEL_35] (rows=13 width=125) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=1 width=125) + SHUFFLE [RS_34] + Select Operator [SEL_33] (rows=13 width=125) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=1 width=133) - predicate:_col3 is null - Merge Join Operator [MERGEJOIN_41] (rows=26 width=133) - Conds:RS_28.UDFToDouble(_col1)=RS_29._col0(Left Outer),Output:["_col0","_col1","_col3"] + Filter Operator [FIL_32] (rows=13 width=145) + predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) + Merge Join Operator [MERGEJOIN_42] (rows=26 width=145) + Conds:RS_29.UDFToDouble(_col1)=RS_30._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_29] PartitionCols:UDFToDouble(_col1) - Merge Join Operator [MERGEJOIN_40] (rows=26 width=125) - Conds:(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_41] (rows=26 width=141) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] Select Operator [SEL_1] (rows=26 width=125) Output:["_col0","_col1"] TableScan [TS_0] (rows=26 width=125) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] - Select Operator [SEL_17] (rows=1 width=8) - Filter Operator [FIL_16] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_10] (rows=1 width=8) - Filter Operator [FIL_9] (rows=1 width=8) - predicate:_col0 is null - Group By Operator [GBY_7] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_37] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_2] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] + SHUFFLE [RS_27] + Group By Operator [GBY_12] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] + Group By Operator [GBY_7] (rows=1 width=8) + Output:["_col0"],aggregations:["avg(VALUE._col0)"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + Group By Operator [GBY_5] (rows=1 width=76) + Output:["_col0"],aggregations:["avg(p_size)"] + Filter Operator [FIL_38] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_2] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_23] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Group By Operator [GBY_21] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_39] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_18] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + Group By Operator [GBY_24] (rows=1 width=12) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=12) + Output:["_col0","_col1"],keys:_col0, true + Group By Operator [GBY_19] (rows=1 width=8) + Output:["_col0"],aggregations:["avg(VALUE._col0)"] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=76) + Output:["_col0"],aggregations:["avg(p_size)"] + Filter Operator [FIL_40] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_14] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b @@ -2528,53 +2844,119 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=1 width=106) + File Output Operator [FS_83] + Select Operator [SEL_82] (rows=2 width=106) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=1 width=106) + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=2 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_33] (rows=1 width=204) - predicate:_col3 is null - Merge Join Operator [MERGEJOIN_42] (rows=5 width=204) - Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - PartitionCols:_col0, _col1 - Select Operator [SEL_26] (rows=1 width=106) - Output:["_col0","_col1"] - Filter Operator [FIL_39] (rows=1 width=114) - predicate:((_col2 - _col1) > 600.0) - Group By Operator [GBY_24] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 - <-Map 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_22] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr - TableScan [TS_20] (rows=26 width=106) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + Filter Operator [FIL_79] (rows=2 width=126) + predicate:CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END + Merge Join Operator [MERGEJOIN_108] (rows=5 width=126) + Conds:RS_76._col0, _col1=RS_77._col3, _col1(Left Outer),Output:["_col0","_col1","_col3","_col4","_col7"] + <-Reducer 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_77] + PartitionCols:_col3, _col1 + Merge Join Operator [MERGEJOIN_107] (rows=1 width=110) + Conds:RS_69._col0=RS_70._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 16 [SIMPLE_EDGE] llap + SHUFFLE [RS_69] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=1 width=110) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_56] (rows=1 width=106) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col0, _col1 + Group By Operator [GBY_54] (rows=1 width=106) + Output:["_col0","_col1"],keys:_col0, _col3 + Merge Join Operator [MERGEJOIN_106] (rows=1 width=106) + Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col3"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_50] + PartitionCols:_col1 + Select Operator [SEL_38] (rows=1 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_98] (rows=1 width=114) + predicate:((_col2 - _col1) > 600.0) + Group By Operator [GBY_36] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr + TableScan [TS_32] (rows=26 width=106) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + <-Reducer 20 [SIMPLE_EDGE] llap + SHUFFLE [RS_51] + PartitionCols:_col0 + Group By Operator [GBY_48] (rows=2 width=8) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] llap + SHUFFLE [RS_47] + PartitionCols:_col0 + Group By Operator [GBY_46] (rows=2 width=8) + Output:["_col0"],keys:_col1 + Select Operator [SEL_103] (rows=5 width=8) + Output:["_col1"] + Group By Operator [GBY_43] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 18 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_41] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr + Select Operator [SEL_40] (rows=26 width=106) + Output:["p_mfgr","p_retailprice"] + TableScan [TS_39] (rows=26 width=106) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + <-Reducer 22 [SIMPLE_EDGE] llap + SHUFFLE [RS_70] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=5 width=98) + Output:["_col0"],keys:_col0 + Group By Operator [GBY_62] (rows=5 width=98) + Output:["_col0"],keys:KEY._col0 + <-Map 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_61] + PartitionCols:_col0 + Group By Operator [GBY_60] (rows=5 width=98) + Output:["_col0"],keys:p_mfgr + TableScan [TS_58] (rows=26 width=98) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] + SHUFFLE [RS_76] PartitionCols:_col0, _col1 - Merge Join Operator [MERGEJOIN_41] (rows=5 width=106) - Conds:(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_105] (rows=5 width=122) + Conds:RS_73._col1=RS_74._col0(Left Outer),Output:["_col0","_col1","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] + SHUFFLE [RS_73] + PartitionCols:_col1 Group By Operator [GBY_4] (rows=5 width=106) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap @@ -2586,31 +2968,61 @@ Stage-0 Output:["p_mfgr","p_retailprice"] TableScan [TS_0] (rows=26 width=106) default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Select Operator [SEL_19] (rows=1 width=8) - Filter Operator [FIL_18] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_16] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_12] (rows=1 width=114) - Filter Operator [FIL_11] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) - Group By Operator [GBY_10] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Group By Operator [GBY_8] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr - Select Operator [SEL_7] (rows=26 width=106) - Output:["p_mfgr","p_retailprice"] - TableScan [TS_6] (rows=26 width=106) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_74] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=1 width=24) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=1 width=24) + Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col3 + Select Operator [SEL_27] (rows=1 width=106) + Output:["_col3","_col0"] + Merge Join Operator [MERGEJOIN_104] (rows=1 width=106) + Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col0","_col3"] + <-Reducer 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=2 width=8) + Output:["_col0"],keys:KEY._col0 + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=2 width=8) + Output:["_col0"],keys:_col1 + Select Operator [SEL_102] (rows=5 width=8) + Output:["_col1"] + Group By Operator [GBY_17] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr + Select Operator [SEL_14] (rows=26 width=106) + Output:["p_mfgr","p_retailprice"] + TableScan [TS_13] (rows=26 width=106) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] + PartitionCols:_col1 + Select Operator [SEL_12] (rows=1 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_96] (rows=1 width=114) + predicate:((_col2 - _col1) > 600.0) + Group By Operator [GBY_10] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Group By Operator [GBY_8] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr + Select Operator [SEL_7] (rows=26 width=106) + Output:["p_mfgr","p_retailprice"] + TableScan [TS_6] (rows=26 width=106) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] PREHOOK: query: explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index 1a532da..a91fc94 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -166,7 +166,7 @@ where key in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"UDFToDouble(src1.key) is not null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 273 val_273 PREHOOK: query: select * from src1 a @@ -178,15 +178,15 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"UDFToDouble((UDFToInteger(b.ctinyint) + 300)) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select key, value from src1 where key not in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(not CASE WHEN ((count(*) = 0)) THEN (false) WHEN (i is not null) THEN (true) WHEN (src1.key is null) THEN (null) WHEN ((count((UDFToDouble(src1.key) + 18.0)) < count(*))) THEN (true) ELSE (false) END)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} PREHOOK: query: select * from src1 a where not exists (select cint from alltypesorc b @@ -196,7 +196,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key AND a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"},{"sources":[5,6,7],"targets":[0,1],"expression":"CASE WHEN ((count(*) = 0)) THEN (true) WHEN (count(*) is null) THEN (true) WHEN (true is not null) THEN (false) WHEN ((count(default.alltypesorc.cint) < count(*))) THEN (false) ELSE (true) END","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"TABLE","vertexId":"default.alltypesorc"},{"id":6,"vertexType":"TABLE","vertexId":"default.src1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"}]} 369 401 val_401 406 val_406 diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 1a006d8..b132cb6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -33,7 +33,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,8 +44,24 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -55,28 +74,21 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -84,19 +96,66 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -244,7 +303,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,19 +314,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -272,23 +331,32 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -296,7 +364,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -309,6 +377,53 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 321f1cc..e71add5 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -31,19 +32,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -54,20 +52,16 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -75,19 +69,32 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -147,7 +154,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -155,18 +165,15 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -175,22 +182,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -198,19 +218,66 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -278,9 +345,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -288,19 +356,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: double) - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -321,9 +386,9 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double) + 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE @@ -386,19 +451,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -455,9 +530,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -465,19 +543,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -485,15 +560,29 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -501,10 +590,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -552,41 +641,89 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Reducer 5 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -647,8 +784,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -656,18 +795,15 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -676,18 +812,35 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -695,7 +848,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -711,6 +864,27 @@ STAGE PLANS: Reducer 4 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -718,14 +892,27 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) - mode: hash + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -809,45 +996,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col0 (type: int), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: l_orderkey + Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -858,44 +1066,37 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -904,32 +1105,32 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -979,3 +1180,3705 @@ POSTHOOK: Input: default@lineitem #### A masked pattern was here #### 108570 8571 4297 1798 +PREHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +POSTHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2544 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col0 * _col5) (type: int) + sort order: + + Map-reduce partition columns: (_col0 * _col5) (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_partkey (type: int) + outputColumnNames: p_type, p_partkey + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col0 * _col5) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +PREHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), (_col1 + 100) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), (_col1 + 100) (type: int) + 1 _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +PREHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 floor(_col7) (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int), _col5 (type: int) + 1 _col0 (type: string), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col0 (type: int) + 1 _col0 (type: int), (_col0 + 121150) (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), (_col0 + 121150) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), (_col0 + 121150) (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +PREHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) + 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) + outputColumnNames: _col3, _col1, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- where plus having +explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where plus having +explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +119 3 +128 3 +167 3 +187 3 +193 3 +199 3 +208 3 +273 3 +298 3 +311 3 +316 3 +318 3 +327 3 +35 3 +369 3 +384 3 +396 3 +403 3 +409 3 +417 3 +430 3 +431 3 +438 3 +454 3 +466 3 +480 3 +498 3 +5 3 +70 3 +90 3 +PREHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: sc + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +119 3 +128 3 +167 3 +187 3 +193 3 +199 3 +208 3 +273 3 +298 3 +311 3 +316 3 +318 3 +327 3 +35 3 +369 3 +384 3 +396 3 +403 3 +409 3 +417 3 +430 3 +431 3 +438 3 +454 3 +466 3 +480 3 +498 3 +5 3 +70 3 +90 3 +PREHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: --order by with limit +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: --order by with limit +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 4 + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: -- union, uncorr +explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +POSTHOOK: query: -- union, uncorr +explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 3 <- Union 4 (CONTAINS) + Map 6 <- Union 4 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Union 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: -- corr, subquery has another subquery in from +explain select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +PREHOOK: type: QUERY +POSTHOOK: query: -- corr, subquery has another subquery in from +explain select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_mfgr (type: string), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#2 almond aquamarine midnight light salmon 2 +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#4 almond aquamarine yellow dodger mint 7 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#5 almond antique sky peru orange 2 +PREHOOK: query: -- join in subquery, correlated predicate with only one table +explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with only one table +explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col2 (type: int) + outputColumnNames: _col3, _col2 + Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col3 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: -- join in subquery, correlated predicate with both inner tables, same outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with both inner tables, same outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int), _col2 (type: int) + 1 _col0 (type: string), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 4050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 18 Data size: 4050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: -- join in subquery, correlated predicate with both inner tables, different outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with both inner tables, different outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int), p_type (type: string) + outputColumnNames: p_size, p_type + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int), p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 7 Data size: 1603 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col6 (type: string), _col7 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 18 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- subquery within from +explain select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery within from +explain select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col2 (type: int) + outputColumnNames: _col3, _col2 + Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col3 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: create table tempty(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: create table tnull(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL) , (NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL) , (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- empty inner table, non-null sq key, expected empty result +select * from part where p_size IN (select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: -- empty inner table, non-null sq key, expected empty result +select * from part where p_size IN (select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +PREHOOK: query: -- empty inner table, null sq key, expected empty result +select * from tnull where i IN (select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@tempty +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- empty inner table, null sq key, expected empty result +select * from tnull where i IN (select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tempty +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: -- null inner table, non-null sq key +select * from part where p_size IN (select i from tnull) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- null inner table, non-null sq key +select * from part where p_size IN (select i from tnull) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: -- null inner table, null sq key +select * from tnull where i IN (select i from tnull) +PREHOOK: type: QUERY +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- null inner table, null sq key +select * from tnull where i IN (select i from tnull) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: drop table tempty +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tempty +PREHOOK: Output: default@tempty +POSTHOOK: query: drop table tempty +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tempty +POSTHOOK: Output: default@tempty diff --git a/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out b/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out new file mode 100644 index 0000000..332425f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 3da1acb..47a9d6b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -27,8 +27,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -50,21 +51,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 6 @@ -79,11 +78,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -95,14 +99,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -112,18 +116,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -132,18 +136,28 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -151,7 +165,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -285,7 +299,6 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -315,11 +328,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 18 <- Map 17 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -332,12 +352,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + value expressions: _col0 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 10 Map Operator Tree: TableScan alias: part @@ -351,7 +373,41 @@ STAGE PLANS: value expressions: p_name (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 15 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 17 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: part @@ -365,49 +421,24 @@ STAGE PLANS: value expressions: p_name (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 8944 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + LLAP IO: no inputs + Reducer 11 Execution mode: llap Reduce Operator Tree: Select Operator @@ -436,36 +467,143 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) - Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (rank_window_0 <= 2) (type: boolean) + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 6 + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 13 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col3 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 18 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 3159 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -497,14 +635,70 @@ STAGE PLANS: predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -512,7 +706,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -551,7 +744,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -581,8 +774,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) @@ -630,6 +824,20 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -639,14 +847,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col1) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -656,18 +864,18 @@ STAGE PLANS: keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 26 Data size: 3770 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 1885 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,24 +932,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -794,11 +993,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -806,7 +1010,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -853,7 +1057,6 @@ almond aquamarine sandy cyan gainsboro 18 almond aquamarine yellow dodger mint 7 almond azure aquamarine papaya violet 12 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- agg, corr explain select p_mfgr, p_name, p_size @@ -881,11 +1084,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) + Reducer 19 <- Map 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -900,12 +1109,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + value expressions: _col0 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 10 Map Operator Tree: TableScan alias: part @@ -918,7 +1129,41 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs - Map 8 + Map 16 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 18 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: part @@ -931,7 +1176,92 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs - Reducer 10 + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 2) (type: boolean) + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 13 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -940,32 +1270,94 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Group By Operator + keys: _col1 (type: int), _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 2 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 15 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col3 (type: int) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 17 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 19 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -974,19 +1366,19 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 1 Data size: 227 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 3159 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1025,113 +1417,81 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Reducer 6 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 is null or _col0 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 7 + Group By Operator + aggregations: count(), count(_col1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - - Stage: Stage-0 - Fetch Operator + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) @@ -1172,21 +1532,21 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond azure blanched chiffon midnight 23 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### POSTHOOK: query: -- non agg, non corr, Group By in Parent Query -select li.l_partkey, count(*) -from lineitem li -where li.l_linenumber = 1 and - li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') group by li.l_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem @@ -1207,7 +1567,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1371,7 +1731,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1390,8 +1750,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1422,19 +1783,21 @@ STAGE PLANS: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (true) ELSE (key is null) END) (type: boolean) - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() + aggregations: count(), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 6 @@ -1445,17 +1808,22 @@ STAGE PLANS: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE ((key < '11')) END) (type: boolean) - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1467,13 +1835,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1483,18 +1852,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 199 Data size: 53929 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 1 Data size: 271 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1503,18 +1872,28 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -1522,7 +1901,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY @@ -1537,3 +1916,7252 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Input: default@t2_v #### A masked pattern was here #### +PREHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +POSTHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 14 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string) + outputColumnNames: p_type + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 16 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string) + outputColumnNames: p_type + Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2544 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int), _col0 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col3 (type: int) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 15 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 17 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col10 = 0)) THEN (true) WHEN (_col10 is null) THEN (true) WHEN (_col14 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2544 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col2) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_partkey (type: int) + outputColumnNames: p_type, p_partkey + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_partkey (type: int) + outputColumnNames: p_type, p_partkey + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col0 * _col5) (type: int) + sort order: + + Map-reduce partition columns: (_col0 * _col5) (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 (_col0 * _col5) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col0 * _col5) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +PREHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 14 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 16 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 (_col0 + 100) (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col3 (type: int) + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 15 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 17 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col0 + 100) (type: int) + sort order: + + Map-reduce partition columns: (_col0 + 100) (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col3 (type: int) + outputColumnNames: _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN ((_col1 + 100) is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 822 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 floor(_col7) (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (floor(_col7) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +PREHOOK: query: explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col5 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col0 (type: int) + 1 _col0 (type: int), (_col0 + 121150) (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col1 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col2 (type: int), _col1 (type: int) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), (_col0 + 121150) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), (_col0 + 121150) (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col5 (type: int), _col5 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int), _col5 (type: int) + 1 _col4 (type: string), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col16 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col11 = 0)) THEN (true) WHEN (_col11 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col12 < _col11)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col0 (type: int) + 1 _col0 (type: int), (_col0 + 121150) (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col1 (type: string) + outputColumnNames: _col3, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), (_col0 + 121150) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), (_col0 + 121150) (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +PREHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col4 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col5 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) + 1 _col1 (type: int), _col4 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col16 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col11 = 0)) THEN (true) WHEN (_col11 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col12 < _col11)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col4 (type: int), _col1 (type: string) + outputColumnNames: _col3, _col4, _col1 + Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +PREHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col3 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 26 Data size: 8658 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 8658 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 6370 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 3185 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +PREHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col3 (type: string), _col1 (type: int) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 26 Data size: 6370 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 6370 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col3 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 6370 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 3185 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- where plus having +explain select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where plus having +explain select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(value) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 20330 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 214 Data size: 20330 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 20330 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 20330 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 18 <- Map 17 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 20 <- Map 19 (SIMPLE_EDGE) + Reducer 21 <- Reducer 20 (SIMPLE_EDGE) + Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: sc + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 17 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 19 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: sc + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col3 (type: string) + Statistics: Num rows: 250 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 18 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 20 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 21 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: --order by with limit +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: --order by with limit +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 4 + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- union, uncorr +explain select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +POSTHOOK: query: -- union, uncorr +explain select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 11 <- Union 9 (CONTAINS) + Map 4 <- Union 5 (CONTAINS) + Map 7 <- Union 5 (CONTAINS) + Map 8 <- Union 9 (CONTAINS) + Reducer 10 <- Union 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Union 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Union 5 + Vertex: Union 5 + Union 9 + Vertex: Union 9 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: explain select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 14 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 16 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 13 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble((_col0 + 100)) (type: double) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col3 (type: int) + Statistics: Num rows: 13 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: boolean) + Reducer 15 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 17 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble((_col0 + 100)) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble((_col0 + 100)) (type: double) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col3 (type: int) + outputColumnNames: _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN ((_col1 + 100) is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +PREHOOK: query: --nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: --nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij') +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: CREATE TABLE t2 (c1 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (c1 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) +PREHOOK: type: QUERY +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- uncorr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) +PREHOOK: type: QUERY +POSTHOOK: query: -- uncorr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(c1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 4 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: -- corr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c2=t2.c1) +PREHOOK: type: QUERY +POSTHOOK: query: -- corr +explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c2=t2.c1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(100)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: char(100)) + sort order: + + Map-reduce partition columns: _col1 (type: char(100)) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c2 (type: char(100)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(100)) + sort order: + + Map-reduce partition columns: _col0 (type: char(100)) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c2 (type: char(100)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(100)) + sort order: + + Map-reduce partition columns: _col0 (type: char(100)) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: char(100)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: char(100)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(100)), _col2 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int), _col1 (type: char(100)) + sort order: ++ + Map-reduce partition columns: _col3 (type: int), _col1 (type: char(100)) + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: char(100)) + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: char(100)) + 1 _col0 (type: char(100)) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: char(100)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: char(100)) + 1 _col3 (type: int), _col1 (type: char(100)) + outputColumnNames: _col0, _col3, _col4, _col7 + Statistics: Num rows: 4 Data size: 378 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col1 (type: char(100)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(100)) + sort order: + + Map-reduce partition columns: _col0 (type: char(100)) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(100)) + sort order: + + Map-reduce partition columns: _col0 (type: char(100)) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: char(100)) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL +1 +PREHOOK: query: DROP TABLE t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: -- corr, nullability, should not produce any result +create table t1(a int, b int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: -- corr, nullability, should not produce any result +create table t1(a int, b int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: insert into t1 values(1,0), (1,0),(1,0) +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values(1,0), (1,0),(1,0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1.b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table t2(a int, b int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(a int, b int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 values(2,1), (3,1), (NULL,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values(2,1), (3,1), (NULL,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t2.b EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a) +PREHOOK: type: QUERY +POSTHOOK: query: explain select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: b (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: drop table t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: drop table t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: -- coor, nullability, should produce result +create table t7(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t7 +POSTHOOK: query: -- coor, nullability, should produce result +create table t7(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t7 +PREHOOK: query: insert into t7 values(null, 5), (4, 15) +PREHOOK: type: QUERY +PREHOOK: Output: default@t7 +POSTHOOK: query: insert into t7 values(null, 5), (4, 15) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t7 +POSTHOOK: Lineage: t7.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t7.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table fixOb(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fixOb +POSTHOOK: query: create table fixOb(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fixOb +PREHOOK: query: insert into fixOb values(-1, 5), (-1, 15) +PREHOOK: type: QUERY +PREHOOK: Output: default@fixob +POSTHOOK: query: insert into fixOb values(-1, 5), (-1, 15) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@fixob +POSTHOOK: Lineage: fixob.i EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: fixob.j EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: fixob + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: fixob + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: fixob + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: fixob + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: t7 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int), _col1 (type: int) + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j) +PREHOOK: type: QUERY +PREHOOK: Input: default@fixob +PREHOOK: Input: default@t7 +#### A masked pattern was here #### +POSTHOOK: query: select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fixob +POSTHOOK: Input: default@t7 +#### A masked pattern was here #### +-1 15 +PREHOOK: query: drop table t7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t7 +PREHOOK: Output: default@t7 +POSTHOOK: query: drop table t7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t7 +POSTHOOK: Output: default@t7 +PREHOOK: query: drop table fixOb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@fixob +PREHOOK: Output: default@fixob +POSTHOOK: query: drop table fixOb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@fixob +POSTHOOK: Output: default@fixob +PREHOOK: query: create table t(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values(1,2), (4,5), (7, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(1,2), (4,5), (7, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.j EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: -- case with empty inner result (t1.j=t.j=NULL) and null subquery key(t.j = NULL) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j) +PREHOOK: type: QUERY +POSTHOOK: query: -- case with empty inner result (t1.j=t.j=NULL) and null subquery key(t.j = NULL) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int), _col1 (type: int) + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +7 +1 +4 +PREHOOK: query: -- case with empty inner result (t1.j=t.j=NULL) and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j) +PREHOOK: type: QUERY +POSTHOOK: query: -- case with empty inner result (t1.j=t.j=NULL) and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: j (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 16 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +7 +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- case with non-empty inner result and null subquery key(t.j is null) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 ) +PREHOOK: type: QUERY +POSTHOOK: query: -- case with non-empty inner result and null subquery key(t.j is null) +explain select t.i from t where t.j NOT IN (select t1.i from t t1 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(i) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select t.i from t where t.j NOT IN (select t1.i from t t1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select t.i from t where t.j NOT IN (select t1.i from t t1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1 +4 +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- case with non-empty inner result and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 ) +PREHOOK: type: QUERY +POSTHOOK: query: -- case with non-empty inner result and non-null subquery key(t.i is never null) +explain select t.i from t where t.i NOT IN (select t1.i from t t1 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(i) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col3 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select t.i from t where t.i NOT IN (select t1.i from t t1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select t.i from t where t.i NOT IN (select t1.i from t t1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out b/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out new file mode 100644 index 0000000..0507ab7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out @@ -0,0 +1,23 @@ +PREHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index 35e80ae..440e076 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -111,8 +111,6 @@ where `b`.`key` not in from `default`.`src` `a` where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_11' ), tableType:VIRTUAL_VIEW) -Warning: Shuffle Join MERGEJOIN[67][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[69][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -130,13 +128,28 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) + Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) + Reducer 21 <- Reducer 20 (SIMPLE_EDGE) + Reducer 23 <- Map 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) + Reducer 24 <- Reducer 23 (SIMPLE_EDGE) + Reducer 26 <- Map 25 (SIMPLE_EDGE) + Reducer 28 <- Map 27 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) + Reducer 29 <- Reducer 28 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) + Reducer 32 <- Map 31 (SIMPLE_EDGE) + Reducer 34 <- Map 33 (SIMPLE_EDGE) + Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -154,112 +167,340 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 11 + Map 10 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (value > 'val_11') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 13 + Map 14 Map Operator Tree: TableScan - alias: a + alias: b + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 16 + Map Operator Tree: + TableScan + alias: b + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 18 + Map Operator Tree: + TableScan + alias: b + properties: + insideView TRUE Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_11') and (key < '11')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string), key (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string) - sort order: +++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 22 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (value > 'val_11') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 25 + Map Operator Tree: + TableScan + alias: b + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 27 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value > 'val_11') and (key < '11')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (value > 'val_11') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string), key (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string) - sort order: +++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 8 + Map 31 + Map Operator Tree: + TableScan + alias: b + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 33 Map Operator Tree: TableScan alias: b properties: insideView TRUE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < '11') (type: boolean) + predicate: (value > 'val_11') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 10 + Map 8 + Map Operator Tree: + TableScan + alias: b + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: boolean) + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: boolean) + Reducer 15 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 17 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 19 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: bigint) + Reducer 20 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -267,16 +508,16 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: string), _col0 (type: string) - 1 _col2 (type: string), _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 166 Data size: 28884 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col4 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col4, _col5, _col9 + Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -287,38 +528,113 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 + Reducer 21 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 2 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 23 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col3, _col0 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 24 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 26 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 28 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 29 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: boolean) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -327,36 +643,79 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: string), _col0 (type: string) - 1 _col2 (type: string), _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col4 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5, _col9 + Statistics: Num rows: 166 Data size: 32868 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 83 Data size: 16434 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reducer 30 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: boolean) + Reducer 32 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 34 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,35 +723,58 @@ STAGE PLANS: Reducer 6 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col3, _col0 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 9 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -400,8 +782,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[67][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[69][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index ff658d7..27f32db 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. explain select * @@ -18,57 +18,61 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) +Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_36] - Select Operator [SEL_35] (rows=1 width=178) + File Output Operator [FS_37] + Select Operator [SEL_36] (rows=250 width=178) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - Select Operator [SEL_20] (rows=1 width=178) + SHUFFLE [RS_23] + Select Operator [SEL_22] (rows=250 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=1 width=265) - predicate:_col3 is null - Map Join Operator [MAPJOIN_28] (rows=1219 width=265) - Conds:MAPJOIN_27._col0=RS_17._col0(Left Outer),Output:["_col0","_col1","_col3"] - <-Map 5 [BROADCAST_EDGE] llap - BROADCAST [RS_17] + Filter Operator [FIL_21] (rows=250 width=198) + predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) + Map Join Operator [MAPJOIN_29] (rows=500 width=198) + Conds:MAPJOIN_28._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] + <-Reducer 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_35] PartitionCols:_col0 - Select Operator [SEL_12] (rows=500 width=87) - Output:["_col0"] - TableScan [TS_11] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map Join Operator [MAPJOIN_27] (rows=500 width=178) - Conds:(Inner),Output:["_col0","_col1"] + Group By Operator [GBY_34] (rows=205 width=91) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=205 width=91) + Output:["_col0","_col1"],keys:_col0, true + Select Operator [SEL_9] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_8] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map Join Operator [MAPJOIN_28] (rows=500 width=194) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_34] - Select Operator [SEL_33] (rows=1 width=8) - Filter Operator [FIL_32] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [GBY_31] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_4] (rows=1 width=87) - Filter Operator [FIL_25] (rows=1 width=87) - predicate:key is null - TableScan [TS_2] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + BROADCAST [RS_33] + Group By Operator [GBY_32] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + Group By Operator [GBY_4] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count()","count(key)"] + Select Operator [SEL_3] (rows=500 width=87) + Output:["key"] + TableScan [TS_2] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from src where not key in diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index a075662..22d1bbe 100644 --- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -31,54 +31,64 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: l_orderkey + Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -89,27 +99,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -121,23 +111,46 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Map 1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -192,65 +205,74 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE) + Map 7 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 4 (BROADCAST_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 2 (BROADCAST_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: lineitem @@ -270,7 +292,41 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 4 + Map 7 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_partkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + input vertices: + 0 Reducer 6 + Statistics: Num rows: 34 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -282,19 +338,84 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 input vertices: - 0 Map 1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: int), _col2 (type: int) + 0 Map 4 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 76c8404..0fdcf53 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -3052,17 +3052,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3095,7 +3095,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3120,7 +3120,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3157,35 +3157,45 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 8 + Target Vertex: Map 1 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3193,34 +3203,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 Union 6 Vertex: Union 6 @@ -3260,17 +3252,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3303,7 +3295,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3328,7 +3320,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3367,35 +3359,45 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 8 + Target Vertex: Map 1 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3403,34 +3405,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 Union 6 Vertex: Union 6 @@ -3471,9 +3455,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS) + Reducer 10 <- Union 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### @@ -3482,7 +3467,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3496,7 +3480,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: srcpart @@ -3520,7 +3504,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3554,57 +3537,67 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 + Target Vertex: Map 5 + Reducer 12 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3623,7 +3616,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3657,49 +3650,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 Union 3 Vertex: Union 3 Union 9 @@ -5318,45 +5278,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Union 5 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -5376,7 +5320,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -5396,32 +5340,42 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -5432,22 +5386,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reducer 7 + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5455,36 +5409,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Union 5 - Vertex: Union 5 + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out index 1925dce..2d8a79e 100644 --- a/ql/src/test/results/clientpositive/masking_3.q.out +++ b/ql/src/test/results/clientpositive/masking_3.q.out @@ -15,56 +15,141 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) - sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -596,10 +681,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -607,50 +794,36 @@ STAGE PLANS: alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean) + predicate: (key > 0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1174,10 +1347,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select key from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -1185,50 +1460,35 @@ STAGE PLANS: alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean) + predicate: (key > 0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: double) + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: double), _col2 (type: double) + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: double), _col2 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1752,65 +2012,157 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select value from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: double) - sort order: ++ - Map-reduce partition columns: _col1 (type: double), _col2 (type: double) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: double), _col2 (type: double) - 1 _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0 + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2330,57 +2682,145 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) - sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6576,10 +7016,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from default.masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -6587,50 +7129,36 @@ STAGE PLANS: alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean) + predicate: (key > 0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7154,10 +7682,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq where masking_test_subq.key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -7165,50 +7795,36 @@ STAGE PLANS: alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean) + predicate: (key > 0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out index 7e923e8..0a81c44 100644 --- a/ql/src/test/results/clientpositive/masking_4.q.out +++ b/ql/src/test/results/clientpositive/masking_4.q.out @@ -165,56 +165,141 @@ with q1 as ( select * from masking_test where key = '5') select * from masking_test_subq POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: double), _col3 (type: double) - sort order: ++ - Map-reduce partition columns: _col2 (type: double), _col3 (type: double) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double), _col3 (type: double) - 1 _col0 (type: double), _col1 (type: double) + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/perf/query45.q.out b/ql/src/test/results/clientpositive/perf/query45.q.out index 7bc137c..e2d0da5 100644 --- a/ql/src/test/results/clientpositive/perf/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/query45.q.out @@ -5,111 +5,117 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 9 <- Reducer 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 - File Output Operator [FS_45] - Limit [LIM_44] (rows=100 width=135) + File Output Operator [FS_47] + Limit [LIM_46] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_43] (rows=95833781 width=135) + Select Operator [SEL_45] (rows=95833781 width=135) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_42] - Group By Operator [GBY_40] (rows=95833781 width=135) + SHUFFLE [RS_44] + Group By Operator [GBY_42] (rows=95833781 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_41] PartitionCols:_col0, _col1 - Group By Operator [GBY_38] (rows=191667562 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col4, _col3 - Select Operator [SEL_37] (rows=191667562 width=135) - Output:["_col4","_col3","_col10"] - Merge Join Operator [MERGEJOIN_72] (rows=191667562 width=135) - Conds:RS_34._col0=RS_35._col4(Inner),Output:["_col3","_col4","_col10"] + Group By Operator [GBY_40] (rows=191667562 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col11)"],keys:_col4, _col3 + Select Operator [SEL_39] (rows=191667562 width=135) + Output:["_col4","_col3","_col11"] + Merge Join Operator [MERGEJOIN_74] (rows=191667562 width=135) + Conds:RS_36._col0=RS_37._col5(Inner),Output:["_col3","_col4","_col11"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_36] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_68] (rows=88000001 width=860) - Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3","_col4"] + Merge Join Operator [MERGEJOIN_70] (rows=88000001 width=860) + Conds:RS_33._col1=RS_34._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_33] PartitionCols:_col1 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_62] (rows=80000000 width=860) + Filter Operator [FIL_64] (rows=80000000 width=860) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_32] + SHUFFLE [RS_34] PartitionCols:_col0 Select Operator [SEL_5] (rows=40000000 width=1014) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_63] (rows=40000000 width=1014) + Filter Operator [FIL_65] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_zip"] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_71] (rows=174243235 width=135) - Conds:RS_27._col0=RS_28._col1(Inner),Output:["_col4","_col5"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_28] + SHUFFLE [RS_37] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_73] (rows=174243235 width=135) + Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col5","_col6"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_70] (rows=158402938 width=135) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_18] + Merge Join Operator [MERGEJOIN_72] (rows=158402938 width=135) + Conds:RS_22._col0=RS_23._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_22] PartitionCols:_col0 - Select Operator [SEL_14] (rows=144002668 width=135) + Select Operator [SEL_18] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_66] (rows=144002668 width=135) + Filter Operator [FIL_68] (rows=144002668 width=135) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null) - TableScan [TS_12] (rows=144002668 width=135) + TableScan [TS_16] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_19] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:_col0 - Select Operator [SEL_17] (rows=18262 width=1119) + Select Operator [SEL_21] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_67] (rows=18262 width=1119) + Filter Operator [FIL_69] (rows=18262 width=1119) predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_15] (rows=73049 width=1119) + TableScan [TS_19] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_27] + SHUFFLE [RS_29] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_69] (rows=508200 width=1436) - Conds:RS_24._col1=RS_25._col0(Left Semi),Output:["_col0"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Group By Operator [GBY_23] (rows=231000 width=1436) - Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=231000 width=1436) - Output:["_col0"] - Filter Operator [FIL_65] (rows=231000 width=1436) - predicate:((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null) - TableScan [TS_9] (rows=462000 width=1436) - default@item,i2,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + Merge Join Operator [MERGEJOIN_71] (rows=508200 width=1436) + Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_24] + SHUFFLE [RS_26] PartitionCols:_col1 Select Operator [SEL_8] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_64] (rows=462000 width=1436) - predicate:(i_item_sk is not null and i_item_id is not null) + Filter Operator [FIL_66] (rows=462000 width=1436) + predicate:i_item_sk is not null TableScan [TS_6] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=115500 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=231000 width=1436) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_11] (rows=231000 width=1436) + Output:["i_item_id"] + Filter Operator [FIL_67] (rows=231000 width=1436) + predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + TableScan [TS_9] (rows=462000 width=1436) + default@item,i2,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out index 611af74..4df31f1 100644 --- a/ql/src/test/results/clientpositive/perf/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -5,92 +5,61 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 - File Output Operator [FS_63] - Limit [LIM_62] (rows=100 width=88) + Reducer 7 + File Output Operator [FS_64] + Limit [LIM_63] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_61] (rows=1045432122 width=88) + Select Operator [SEL_62] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - Select Operator [SEL_58] (rows=1045432122 width=88) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_61] + Select Operator [SEL_59] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_57] (rows=1045432122 width=88) + PTF Operator [PTF_58] (rows=1149975358 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST","partition by:":"_col5, CASE WHEN ((_col5 = 2)) THEN (_col0) ELSE (null) END"}] - Select Operator [SEL_56] (rows=1045432122 width=88) + Select Operator [SEL_57] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_55] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col5, CASE WHEN ((_col5 = 2)) THEN (_col0) ELSE (null) END - Select Operator [SEL_54] (rows=1045432122 width=88) + Select Operator [SEL_55] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] - Group By Operator [GBY_53] (rows=1045432122 width=88) + Group By Operator [GBY_54] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_51] (rows=2090864244 width=88) + Group By Operator [GBY_52] (rows=2299950717 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, '0' - Select Operator [SEL_49] (rows=696954748 width=88) + Select Operator [SEL_50] (rows=766650239 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_92] (rows=696954748 width=88) - Conds:RS_46._col1=RS_47._col0(Inner),Output:["_col2","_col6","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88) - Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_81] (rows=575995635 width=88) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,ss,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_82] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_47] + Merge Join Operator [MERGEJOIN_92] (rows=766650239 width=88) + Conds:RS_47._col7=RS_48._col0(Inner),Output:["_col2","_col6","_col7"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_91] (rows=127775039 width=88) - Conds:RS_39._col2=RS_40._col0(Left Semi),Output:["_col0","_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col2 - Select Operator [SEL_8] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_83] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] + Group By Operator [GBY_39] (rows=58079562 width=88) + Output:["_col0"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_38] PartitionCols:_col0 - Group By Operator [GBY_38] (rows=116159124 width=88) + Group By Operator [GBY_37] (rows=116159124 width=88) Output:["_col0"],keys:_col0 Select Operator [SEL_32] (rows=116159124 width=88) Output:["_col0"] @@ -112,21 +81,21 @@ Stage-0 Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 Select Operator [SEL_24] (rows=696954748 width=88) Output:["_col6","_col2"] - Merge Join Operator [MERGEJOIN_90] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_91] (rows=696954748 width=88) Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"] - <-Map 16 [SIMPLE_EDGE] + <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Select Operator [SEL_17] (rows=1704 width=1910) Output:["_col0","_col1"] Filter Operator [FIL_87] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) + predicate:s_store_sk is not null TableScan [TS_15] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_89] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_90] (rows=633595212 width=88) Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_18] @@ -137,7 +106,7 @@ Stage-0 predicate:(ss_store_sk is not null and ss_sold_date_sk is not null) TableScan [TS_9] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Map 15 [SIMPLE_EDGE] + <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=8116 width=1119) @@ -146,4 +115,41 @@ Stage-0 predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88) + Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_83] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_6] (rows=1704 width=1910) + default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_81] (rows=575995635 width=88) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,ss,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_82] (rows=8116 width=1119) + predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/semijoin4.q.out b/ql/src/test/results/clientpositive/semijoin4.q.out index 3c065a9..89e4023 100644 --- a/ql/src/test/results/clientpositive/semijoin4.q.out +++ b/ql/src/test/results/clientpositive/semijoin4.q.out @@ -56,9 +56,10 @@ WHERE (t2.tinyint_col_21) IN ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-3 depends on stages: Stage-2 Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -69,7 +70,7 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((UDFToInteger(tinyint_col_46) = -92) and decimal1309_col_65 is not null and bigint_col_13 is not null) (type: boolean) + predicate: (decimal1309_col_65 is not null and bigint_col_13 is not null and tinyint_col_46 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), tinyint_col_46 (type: tinyint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) @@ -85,7 +86,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((UDFToInteger(tinyint_col_21) = -92) and tinyint_col_18 is not null and decimal2709_col_9 is not null) (type: boolean) + predicate: (tinyint_col_18 is not null and tinyint_col_21 is not null and decimal2709_col_9 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint), tinyint_col_21 (type: tinyint) @@ -105,27 +106,23 @@ STAGE PLANS: 1 _col2 (type: tinyint), _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) outputColumnNames: _col1, _col3, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint), UDFToInteger(_col7) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: UDFToInteger(_col7) (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: UDFToInteger(_col7) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: double), _col2 (type: tinyint) + value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint) TableScan Reduce Output Operator key expressions: _col0 (type: int) @@ -135,11 +132,11 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 UDFToInteger(_col7) (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col1, _col3, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -153,27 +150,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: (UDFToShort(_col2) + _col0) (type: smallint), floor(_col1) (type: bigint) + key expressions: (UDFToShort(_col7) + _col1) (type: smallint), floor(_col3) (type: bigint) sort order: +- - Map-reduce partition columns: (UDFToShort(_col2) + _col0) (type: smallint) + Map-reduce partition columns: (UDFToShort(_col7) + _col1) (type: smallint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: double), _col2 (type: tinyint) + value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: double), VALUE._col2 (type: tinyint) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double), VALUE._col7 (type: tinyint) + outputColumnNames: _col1, _col3, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: smallint, _col1: double, _col2: tinyint + output shape: _col1: smallint, _col3: double, _col7: tinyint type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (UDFToShort(_col2) + _col0) ASC NULLS FIRST, floor(_col1) DESC NULLS LAST - partition by: (UDFToShort(_col2) + _col0) + order by: (UDFToShort(_col7) + _col1) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST + partition by: (UDFToShort(_col7) + _col1) raw input shape: window functions: window function definition @@ -237,21 +234,39 @@ STAGE PLANS: 0 _col0 (type: decimal(19,11)), _col1 (type: timestamp) 1 _col0 (type: decimal(19,11)), _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: -92 (type: int) + Group By Operator + keys: -92 (type: int) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out index 63b477c..20d372a 100644 --- a/ql/src/test/results/clientpositive/semijoin5.q.out +++ b/ql/src/test/results/clientpositive/semijoin5.q.out @@ -48,10 +48,14 @@ WHERE (t2.smallint_col_19) IN (SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-8 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 - Stage-6 is a root stage + Stage-9 is a root stage + Stage-10 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 Stage-0 depends on stages: Stage-4 STAGE PLANS: @@ -62,7 +66,7 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null) (type: boolean) + predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), double_col_16 (type: double), decimal2016_col_26 (type: decimal(20,16)), smallint_col_50 (type: smallint) @@ -78,7 +82,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((UDFToInteger(smallint_col_19) = -92) and tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean) + predicate: (tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal2709_col_9 (type: decimal(27,9)), int_col_10 (type: int), tinyint_col_15 (type: tinyint), smallint_col_19 (type: smallint), tinyint_col_20 (type: tinyint) @@ -99,41 +103,37 @@ STAGE PLANS: 1 _col4 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col2) (type: bigint) outputColumnNames: _col2, _col3, _col5, _col7, _col9 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col2 (type: timestamp), _col3 (type: double), _col5 (type: smallint), _col7 (type: int), UDFToInteger(_col9) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: timestamp), _col4 (type: int) + key expressions: _col2 (type: timestamp), UDFToInteger(_col9) (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: timestamp), _col4 (type: int) + Map-reduce partition columns: _col2 (type: timestamp), UDFToInteger(_col9) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: smallint), _col3 (type: int) + value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) TableScan Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: int) + key expressions: _col1 (type: timestamp), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) + Map-reduce partition columns: _col1 (type: timestamp), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: timestamp), _col4 (type: int) - 1 _col0 (type: timestamp), _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 + 0 _col2 (type: timestamp), UDFToInteger(_col9) (type: int) + 1 _col1 (type: timestamp), _col0 (type: int) + outputColumnNames: _col3, _col5, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -147,27 +147,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: (_col3 + UDFToInteger(_col2)) (type: int), floor(_col1) (type: bigint) + key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) sort order: +- - Map-reduce partition columns: (_col3 + UDFToInteger(_col2)) (type: int) + Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: smallint), _col3 (type: int) + value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col1 (type: double), VALUE._col2 (type: smallint), VALUE._col3 (type: int) - outputColumnNames: _col1, _col2, _col3 + expressions: VALUE._col3 (type: double), VALUE._col5 (type: smallint), VALUE._col7 (type: int) + outputColumnNames: _col3, _col5, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col1: double, _col2: smallint, _col3: int + output shape: _col3: double, _col5: smallint, _col7: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (_col3 + UDFToInteger(_col2)) ASC NULLS FIRST, floor(_col1) DESC NULLS LAST - partition by: (_col3 + UDFToInteger(_col2)) + order by: (_col7 + UDFToInteger(_col5)) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST + partition by: (_col7 + UDFToInteger(_col5)) raw input shape: window functions: window function definition @@ -179,8 +179,8 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: LEAD_window_0 (type: int), _col1 (type: double), _col2 (type: smallint), _col3 (type: int) - outputColumnNames: LEAD_window_0, _col1, _col2, _col3 + expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) + outputColumnNames: LEAD_window_0, _col3, _col5, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -194,27 +194,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: (_col3 + UDFToInteger(_col2)) (type: int), floor(_col1) (type: bigint) + key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) sort order: -- - Map-reduce partition columns: (_col3 + UDFToInteger(_col2)) (type: int) + Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: LEAD_window_0 (type: int), _col1 (type: double), _col2 (type: smallint), _col3 (type: int) + value expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: smallint), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 + expressions: VALUE._col0 (type: int), VALUE._col4 (type: double), VALUE._col6 (type: smallint), VALUE._col8 (type: int) + outputColumnNames: _col0, _col4, _col6, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col2: double, _col3: smallint, _col4: int + output shape: _col0: int, _col4: double, _col6: smallint, _col8: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (_col4 + UDFToInteger(_col3)) DESC NULLS LAST, floor(_col2) DESC NULLS LAST - partition by: (_col4 + UDFToInteger(_col3)) + order by: (_col8 + UDFToInteger(_col6)) DESC NULLS LAST, floor(_col4) DESC NULLS LAST + partition by: (_col8 + UDFToInteger(_col6)) raw input shape: window functions: window function definition @@ -225,7 +225,7 @@ STAGE PLANS: window frame: PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: COALESCE(498,_col0,524) (type: int), (_col4 + UDFToInteger(_col3)) (type: int), floor(_col2) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) + expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -236,40 +236,149 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan - alias: tt1 + alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: decimal2612_col_77 is not null (type: boolean) + predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: decimal2612_col_77 (type: decimal(26,12)) - outputColumnNames: _col0 + expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), decimal2016_col_26 (type: decimal(20,16)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: decimal(26,12)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(26,12)) + key expressions: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: timestamp) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_15 (type: tinyint), tinyint_col_20 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) + sort order: +++ + Map-reduce partition columns: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) + 1 _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col2 (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: tt2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean) + predicate: decimal1911_col_16 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator + key expressions: _col1 (type: timestamp) + sort order: + + Map-reduce partition columns: _col1 (type: timestamp) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: decimal(19,11)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: timestamp) + 1 _col0 (type: timestamp) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(26,12)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(26,12)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: timestamp) + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: decimal2612_col_77 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: decimal2612_col_77 (type: decimal(26,12)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator key expressions: _col0 (type: decimal(26,12)) sort order: + Map-reduce partition columns: _col0 (type: decimal(26,12)) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: timestamp) Reduce Operator Tree: Join Operator condition map: @@ -280,11 +389,11 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: timestamp), -92 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col2 (type: timestamp) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: _col0 (type: timestamp), _col1 (type: int) + keys: _col1 (type: timestamp), -92 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -295,6 +404,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index 567c6d3..a40115c 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -95,7 +95,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) + Reducer 5 <- Reducer 4 (GROUP, 4) + Reducer 7 <- Map 6 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -104,7 +107,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) + predicate: (l_linenumber = 1) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -122,27 +125,42 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: (l_shipmode = 'AIR') (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_linenumber (type: int) + outputColumnNames: l_linenumber + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_linenumber (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) @@ -159,6 +177,50 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index b58fcbe..c28a218 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -32,7 +32,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -40,8 +43,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -52,45 +69,86 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -237,7 +295,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -245,46 +306,54 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -297,6 +366,50 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 21a48ec..36e3e6e 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -22,7 +22,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -30,19 +31,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan @@ -51,37 +49,45 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -140,7 +146,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -148,57 +157,113 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -265,9 +330,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -275,19 +341,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: double) - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: int) + value expressions: _col0 (type: string), _col1 (type: int) Map 3 Map Operator Tree: TableScan @@ -303,9 +366,9 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: double) + 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE @@ -366,19 +429,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -434,9 +506,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -444,41 +519,54 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) + TopN Hash Memory Usage: 0.1 + Map 8 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -526,39 +614,84 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col1 (type: int) Reducer 5 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 6 + Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -618,8 +751,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -627,70 +762,118 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + value expressions: _col0 (type: string) + Map 6 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string) + keys: value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + keys: _col0 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -773,43 +956,62 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Map 4 + value expressions: _col0 (type: int), _col2 (type: int) + Map 6 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 + outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE @@ -818,41 +1020,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 3 + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -860,10 +1056,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -873,18 +1069,18 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -934,3 +1130,3506 @@ POSTHOOK: Input: default@lineitem #### A masked pattern was here #### 108570 8571 4297 1798 +PREHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +POSTHOOK: query: --where has multiple conjuction +explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string) + outputColumnNames: p_type + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 * _col5) (type: int) + sort order: + + Map-reduce partition columns: (_col0 * _col5) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_partkey (type: int) + outputColumnNames: p_type, p_partkey + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_partkey) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col0 * _col5) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +PREHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: --lhs contains non-simple expression, corr +explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), (_col1 + 100) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: p_name + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), (_col1 + 100) (type: int) + 1 _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: int) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +PREHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- lhs contains udf expression +explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: p_type, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 floor(_col7) (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int), _col5 (type: int) + 1 _col0 (type: string), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col0 (type: int) + 1 _col0 (type: int), (_col0 + 121150) (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), (_col0 + 121150) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), (_col0 + 121150) (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +PREHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated query, multiple correlated variables referring to different outer var +explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: p_partkey, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) + 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) + outputColumnNames: _col3, _col1, _col4 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias +explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_brand (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: p_brand + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +POSTHOOK: query: -- correlated var refers to outer table alias which is an expression +explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse khaki white +almond antique chartreuse lavender yellow +almond antique forest lavender goldenrod +almond antique gainsboro frosted violet +almond antique medium spring khaki +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique salmon chartreuse burlywood +almond antique sky peru orange +almond antique violet chocolate turquoise +almond antique violet mint lemon +almond antique violet turquoise frosted +almond aquamarine burnished black steel +almond aquamarine dodger light gainsboro +almond aquamarine floral ivory bisque +almond aquamarine midnight light salmon +almond aquamarine pink moccasin thistle +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond azure blanched chiffon midnight +PREHOOK: query: -- where plus having +explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where plus having +explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +119 3 +128 3 +167 3 +187 3 +193 3 +199 3 +208 3 +273 3 +298 3 +311 3 +316 3 +318 3 +327 3 +35 3 +369 3 +384 3 +396 3 +403 3 +409 3 +417 3 +430 3 +431 3 +438 3 +454 3 +466 3 +480 3 +498 3 +5 3 +70 3 +90 3 +PREHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where with having, correlated +explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 11 <- Map 10 (GROUP, 2) + Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: sc + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reducer 12 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +119 3 +128 3 +167 3 +187 3 +193 3 +199 3 +208 3 +273 3 +298 3 +311 3 +316 3 +318 3 +327 3 +35 3 +369 3 +384 3 +396 3 +403 3 +409 3 +417 3 +430 3 +431 3 +438 3 +454 3 +466 3 +480 3 +498 3 +5 3 +70 3 +90 3 +PREHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery with order by +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: --order by with limit +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: --order by with limit +explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col5 - 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 4 + Statistics: Num rows: 4 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +PREHOOK: query: -- union, uncorr +explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +POSTHOOK: query: -- union, uncorr +explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2), Map 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src where key IN (select p_name from part UNION ALL select p_brand from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: -- corr, subquery has another subquery in from +explain select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +PREHOOK: type: QUERY +POSTHOOK: query: -- corr, subquery has another subquery in from +explain select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name in + (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#2 almond aquamarine midnight light salmon 2 +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#4 almond aquamarine yellow dodger mint 7 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#5 almond antique sky peru orange 2 +PREHOOK: query: -- join in subquery, correlated predicate with only one table +explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with only one table +explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: int) + outputColumnNames: _col3, _col2 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: -- join in subquery, correlated predicate with both inner tables, same outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with both inner tables, same outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Map 11 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 11 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 11 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 12 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int), _col2 (type: int) + 1 _col0 (type: string), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: -- join in subquery, correlated predicate with both inner tables, different outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type) +PREHOOK: type: QUERY +POSTHOOK: query: -- join in subquery, correlated predicate with both inner tables, different outer var +explain select p_partkey from part where p_name in + (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 11 <- Map 10 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 11 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int), p_type (type: string) + outputColumnNames: p_size, p_type + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int), p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 11 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4118 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4118 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col6 (type: string), _col7 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 30 Data size: 3744 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- subquery within from +explain select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery within from +explain select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: int) + outputColumnNames: _col3, _col2 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey from + (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 +110592 +112398 +121152 +121152 +132666 +144293 +146985 +15103 +155733 +17273 +17927 +191709 +192697 +195606 +33357 +40982 +42669 +45261 +48427 +49671 +65667 +78486 +85768 +86428 +90681 +PREHOOK: query: create table tempty(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: create table tnull(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL) , (NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL) , (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- empty inner table, non-null sq key, expected empty result +select * from part where p_size IN (select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: -- empty inner table, non-null sq key, expected empty result +select * from part where p_size IN (select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +PREHOOK: query: -- empty inner table, null sq key, expected empty result +select * from tnull where i IN (select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@tempty +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- empty inner table, null sq key, expected empty result +select * from tnull where i IN (select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tempty +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: -- null inner table, non-null sq key +select * from part where p_size IN (select i from tnull) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- null inner table, non-null sq key +select * from part where p_size IN (select i from tnull) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: -- null inner table, null sq key +select * from tnull where i IN (select i from tnull) +PREHOOK: type: QUERY +PREHOOK: Input: default@tnull +#### A masked pattern was here #### +POSTHOOK: query: -- null inner table, null sq key +select * from tnull where i IN (select i from tnull) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tnull +#### A masked pattern was here #### +PREHOOK: query: drop table tempty +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tempty +PREHOOK: Output: default@tempty +POSTHOOK: query: drop table tempty +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tempty +POSTHOOK: Output: default@tempty diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 012c3eb..14544c5 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -31,33 +31,71 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 + outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Execution mode: vectorized Local Work: Map Reduce Local Work - Map 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: lineitem @@ -75,7 +113,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -85,60 +123,37 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -185,43 +200,149 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: (l_shipmode = 'AIR') (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work + + Stage: Stage-2 + Spark + Edges: + Reducer 8 <- Map 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Local Work: + Map Reduce Local Work + Map 7 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + input vertices: + 0 Reducer 6 + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 8 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 6599 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 4 + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Reducer 9 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -237,7 +358,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -247,60 +368,76 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Reducer 9 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-4 Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan - alias: li + alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: vectorized Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subq_where_serialization.q.out b/ql/src/test/results/clientpositive/subq_where_serialization.q.out index f689651..5deb9d9 100644 --- a/ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ b/ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -4,11 +4,9 @@ POSTHOOK: query: explain select src.key from src where src.key in ( select disti POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 - Stage-5 has a backup stage: Stage-1 - Stage-3 depends on stages: Stage-5 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-1 + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -17,8 +15,9 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string) @@ -38,9 +37,9 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) - mode: hash + mode: complete outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -49,93 +48,46 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 - Conditional Operator - - Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_0:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: + $hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index 86f9089..1019e7a 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -25,60 +25,146 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,53 +308,136 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out index 8861c82..e54e18f 100644 --- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -22,7 +22,10 @@ having exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -32,8 +35,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -72,41 +76,132 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -175,8 +270,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -194,36 +290,48 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_9') and key is not null) (type: boolean) + predicate: (value > 'val_9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1166 Data size: 12387 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -236,23 +344,84 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 291 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Mux Operator + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Mux Operator + Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out index 854aa36..e277c59 100644 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -60,8 +60,9 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.k POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -94,15 +95,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -119,11 +117,11 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -169,24 +167,39 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -269,8 +282,11 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-6 is a root stage + Stage-3 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -280,25 +296,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + Group By Operator + aggregations: count() + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -306,19 +319,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -332,17 +342,17 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) + key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: bigint) - 1 _col0 (type: string), _col1 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -353,6 +363,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -360,53 +412,108 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -451,8 +558,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -495,11 +603,11 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -519,8 +627,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -550,9 +659,9 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) - mode: hash + mode: complete outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -601,8 +710,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -619,8 +729,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -644,10 +755,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -673,23 +784,28 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -765,60 +881,84 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Demux Operator + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -828,7 +968,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -836,16 +976,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -855,7 +992,7 @@ STAGE PLANS: key expressions: _col2 (type: bigint) sort order: + Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator @@ -864,21 +1001,47 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Demux Operator + Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -912,24 +1075,17 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -966,60 +1122,91 @@ group by key, value having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5 Stage-5 is a root stage - Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-9, Stage-3 - Stage-9 has a backup stage: Stage-3 - Stage-6 depends on stages: Stage-9 - Stage-3 - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-6, Stage-3 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '9') (type: boolean) + predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count() keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Demux Operator + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -1027,44 +1214,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - - Stage: Stage-6 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1074,7 +1246,7 @@ STAGE PLANS: key expressions: _col2 (type: bigint) sort order: + Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator @@ -1083,101 +1255,91 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Demux Operator + Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: b + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '8') (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1203,12 +1365,9 @@ having p_name in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-6, Stage-2 - Stage-6 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-6 - Stage-2 + Stage-2 depends on stages: Stage-1, Stage-3 Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-2 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1217,8 +1376,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_name, p_mfgr, p_size Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -1250,45 +1410,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 - Conditional Operator - - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -1306,21 +1427,47 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Demux Operator + Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Map Reduce @@ -1359,24 +1506,21 @@ STAGE PLANS: window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: first_value_window_0 is not null (type: boolean) + Select Operator + expressions: first_value_window_0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: first_value_window_0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index ede7855..a6a4764 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -19,11 +19,17 @@ where not exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3, Stage-6 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-5 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -31,13 +37,34 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -45,38 +72,229 @@ STAGE PLANS: predicate: (value > 'val_2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col3, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string), _col1 (type: string) + 2 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -243,11 +461,49 @@ where not exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3, Stage-6 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-5 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -258,35 +514,77 @@ STAGE PLANS: predicate: (value > 'val_2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -306,34 +604,162 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col6 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 9349f2d..5839a2b 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -22,7 +22,13 @@ having not exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -69,43 +75,265 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + 2 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (value > 'val_12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col3, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -173,8 +401,13 @@ having not exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -227,34 +460,86 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -265,35 +550,204 @@ STAGE PLANS: predicate: (value > 'val_12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index 804f411..9f72cc9 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain @@ -24,8 +24,9 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2, Stage-5 Stage-4 is a root stage + Stage-5 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -76,7 +77,8 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -84,8 +86,8 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -101,23 +103,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -125,18 +119,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -147,38 +141,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count() + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -186,7 +209,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select b.p_mfgr, min(p_retailprice) @@ -211,11 +233,20 @@ having b.p_mfgr not in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-12 Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage + Stage-5 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-8 depends on stages: Stage-7 + Stage-9 is a root stage + Stage-10 depends on stages: Stage-9, Stage-14 + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11, Stage-15 + Stage-13 is a root stage + Stage-14 depends on stages: Stage-13 + Stage-15 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -260,22 +291,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) + value expressions: _col0 (type: string) TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -291,32 +327,34 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) + key expressions: _col3 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col3 (type: string), _col1 (type: double) + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: double) - 1 _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE + 1 _col3 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -352,41 +390,123 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col2 - _col1) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: double), _col0 (type: string) + outputColumnNames: _col3, _col0 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col3 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -394,7 +514,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -437,45 +579,244 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col3 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr from part a group by p_mfgr having max(p_retailprice) - min(p_retailprice) > 600 @@ -551,6 +892,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -558,7 +900,7 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -576,13 +918,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -590,10 +933,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col5 Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) @@ -613,52 +956,47 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_retailprice (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: null (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col2 (type: double) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce @@ -667,24 +1005,19 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce @@ -703,8 +1036,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), true (type: boolean) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) @@ -722,12 +1055,17 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -735,7 +1073,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select b.p_mfgr, min(p_retailprice) from part b group by b.p_mfgr @@ -762,3 +1100,675 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 1173.15 Manufacturer#2 1690.68 +PREHOOK: query: --nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: --nullability tests +CREATE TABLE t1 (c1 INT, c2 CHAR(100)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij') +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: CREATE TABLE t2 (c1 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (c1 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) +PREHOOK: type: QUERY +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(c1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-9 + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-4 + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8, Stage-11 + Stage-11 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col0 (type: int) + Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col3 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col3 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col0 (type: int) + 1 _col3 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((_col2 = 0)) THEN (true) WHEN (_col2 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL +1 +PREHOOK: query: DROP TABLE t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index c7e1f02..7385b4c 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -41,60 +41,147 @@ POSTHOOK: query: -- non agg, corr explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: src11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key1 > '9') and value1 is not null) (type: boolean) + Select Operator + expressions: value1 (type: string) + outputColumnNames: value1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key1 (type: string), value1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: value1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key1 (type: string), value1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -111,60 +198,147 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from src a where a.key in (select key from src where a.value = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,8 +370,11 @@ from part b where b.p_size in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-6 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -207,15 +384,12 @@ STAGE PLANS: TableScan alias: part2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: p2_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p2_mfgr (type: string), p2_size (type: int) + sort order: ++ + Map-reduce partition columns: p2_mfgr (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: p2_mfgr (type: string), p2_size (type: int) - sort order: ++ - Map-reduce partition columns: p2_mfgr (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -249,18 +423,12 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -272,21 +440,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: int) + aggregations: min(_col1) + keys: _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -294,38 +472,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -340,6 +570,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -364,8 +627,11 @@ from part b where b.p_size in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-6 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -375,15 +641,12 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) @@ -417,18 +680,12 @@ STAGE PLANS: expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -440,27 +697,92 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: int) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -468,32 +790,29 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_mfgr is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -508,6 +827,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -535,11 +887,46 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -547,29 +934,65 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) + predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) - mode: hash + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -583,36 +1006,33 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -640,8 +1060,11 @@ having count(*) in (select count(*) from src where src.key > '9' and src.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-6 is a root stage + Stage-3 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -651,25 +1074,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + Group By Operator + aggregations: count() + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -677,19 +1097,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -703,17 +1120,17 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) + key expressions: _col1 (type: string), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: bigint) - 1 _col0 (type: string), _col1 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -724,60 +1141,157 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -785,7 +1299,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -808,10 +1321,17 @@ where b.p_name not in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-5 is a root stage + Stage-4 depends on stages: Stage-3, Stage-6 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1, Stage-10 + Stage-6 is a root stage + Stage-7 is a root stage + Stage-8 depends on stages: Stage-7, Stage-11 + Stage-9 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-9, Stage-12 + Stage-11 is a root stage + Stage-12 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -855,41 +1375,54 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -897,6 +1430,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Map Reduce Map Operator Tree: @@ -908,22 +1465,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + value expressions: _col0 (type: string), _col2 (type: int) TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -939,39 +1501,73 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col3 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col3 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 28 Data size: 3719 Basic stats: COMPLETE Column stats: NONE + 1 _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col4 is null (type: boolean) - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -1014,7 +1610,7 @@ STAGE PLANS: predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1024,6 +1620,167 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string) + outputColumnNames: p_mfgr + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: p_name + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 3468657..af42e41 100644 --- a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. explain select * @@ -17,8 +17,14 @@ order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-4 + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-5, Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-5 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -28,47 +34,38 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src Fetch Operator limit: -1 - $hdt$_2:src - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:src TableScan @@ -82,20 +79,8 @@ STAGE PLANS: keys: 0 1 - $hdt$_2:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -105,50 +90,164 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 550 Data size: 10793 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product PREHOOK: query: select * from src where not key in diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 160b088..b2b9a3b 100644 --- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -23,19 +23,20 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-9 - Stage-10 has a backup stage: Stage-2 + Stage-1 is a root stage + Stage-12 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-12 + Stage-8 depends on stages: Stage-5, Stage-9 , consists of Stage-10, Stage-11, Stage-3 + Stage-10 has a backup stage: Stage-3 Stage-6 depends on stages: Stage-10 - Stage-2 - Stage-11 is a root stage - Stage-8 depends on stages: Stage-11 - Stage-0 depends on stages: Stage-5, Stage-6, Stage-2 + Stage-11 has a backup stage: Stage-3 + Stage-7 depends on stages: Stage-11 + Stage-3 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-6, Stage-7, Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -67,10 +68,54 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 - Conditional Operator + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:li + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:li + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -84,7 +129,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -94,10 +139,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -110,7 +155,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-10 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -124,7 +169,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -134,10 +179,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -150,7 +195,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -159,13 +204,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col0 (type: int), _col3 (type: int) TableScan Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -173,10 +218,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -187,63 +232,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:lineitem - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:lineitem + Stage: Stage-5 + Map Reduce + Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: _col0 + outputColumnNames: l_orderkey Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_orderkey (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -290,19 +313,24 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-9 - Stage-10 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-10 - Stage-2 - Stage-11 is a root stage - Stage-8 depends on stages: Stage-11 - Stage-0 depends on stages: Stage-5, Stage-6, Stage-2 + Stage-1 is a root stage + Stage-19 depends on stages: Stage-1 + Stage-14 depends on stages: Stage-19 + Stage-13 depends on stages: Stage-6, Stage-14 , consists of Stage-17, Stage-18, Stage-3 + Stage-17 has a backup stage: Stage-3 + Stage-11 depends on stages: Stage-17 + Stage-18 has a backup stage: Stage-3 + Stage-12 depends on stages: Stage-18 + Stage-3 + Stage-7 is a root stage + Stage-21 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-21 + Stage-20 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-20 + Stage-0 depends on stages: Stage-11, Stage-12, Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -334,10 +362,54 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-19 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:li + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:li + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-13 Conditional Operator - Stage: Stage-9 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -348,10 +420,10 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) - Stage: Stage-5 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -359,12 +431,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -377,7 +449,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-10 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -388,10 +460,10 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) - Stage: Stage-6 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -399,12 +471,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -417,33 +489,33 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col1 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col0 (type: int), _col3 (type: int) TableScan Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -454,63 +526,157 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-11 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-21 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:lineitem + $hdt$_2:$hdt$_3:$hdt$_4:li Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:lineitem + $hdt$_2:$hdt$_3:$hdt$_4:li TableScan - alias: lineitem + alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_partkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan - alias: li + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 6599 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-20 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_2:lineitem + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:$hdt$_2:lineitem + TableScan + alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: l_orderkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 + HashTable Sink Operator keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator