diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8aee7f5..93584fd 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -406,8 +406,12 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ minillap.query.files=acid_bucket_pruning.q,\ bucket5.q,\ bucket6.q,\ + except_distinct.q,\ explainuser_2.q,\ empty_dir_in_table.q,\ + intersect_all.q,\ + intersect_distinct.q,\ + intersect_merge.q,\ llap_udf.q,\ llapdecider.q,\ reduce_deduplicate.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index f308832..7ed3907 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -27,6 +27,7 @@ import org.antlr.runtime.tree.Tree; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin; +import org.apache.hadoop.hive.ql.parse.SemanticException; /** * List of all error messages. @@ -450,6 +451,7 @@ ACID_NOT_ENOUGH_HISTORY(10327, "Not enough history available for ({0},{1}). " + "Oldest available base: {2}", true), INVALID_COLUMN_NAME(10328, "Invalid column name"), + UNSUPPORTED_SET_OPERATOR(10329, "Unsupported set operator"), REPLACE_VIEW_WITH_MATERIALIZED(10400, "Attempt to replace view {0} with materialized view", true), REPLACE_MATERIALIZED_WITH_VIEW(10401, "Attempt to replace materialized view {0} with view", true), UPDATE_DELETE_VIEW(10402, "You cannot update or delete records in a view"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index c527e58..6ccd879 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -27,9 +27,11 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.plan.RelOptUtil.InputFinder; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.core.Sort; @@ -51,21 +53,30 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseUtils; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -942,6 +953,68 @@ public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, return fieldNames; } + public static AggregateCall createSingleArgAggCall(String funcName, RelOptCluster cluster, + PrimitiveTypeInfo typeInfo, Integer pos, RelDataType aggFnRetType) { + ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); + aggArgRelDTBldr.add(TypeConverter.convert(typeInfo, cluster.getTypeFactory())); + SqlAggFunction aggFunction = SqlFunctionConverter.getCalciteAggFn(funcName, false, + aggArgRelDTBldr.build(), aggFnRetType); + List argList = new ArrayList(); + argList.add(pos); + return new AggregateCall(aggFunction, false, argList, aggFnRetType, null); + } + + public static HiveTableFunctionScan createUDTFForSetOp(RelOptCluster cluster, RelNode input) + throws SemanticException { + RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster); + + List originalInputRefs = Lists.transform(input.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + ImmutableList.Builder argTypeBldr = ImmutableList. builder(); + for (int i = 0; i < originalInputRefs.size(); i++) { + argTypeBldr.add(originalInputRefs.get(i).getType()); + } + + RelDataType retType = input.getRowType(); + + String funcName = "replicate_rows"; + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); + SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(funcName, fi.getGenericUDTF(), + argTypeBldr.build(), retType); + + // Hive UDTF only has a single input + List list = new ArrayList<>(); + list.add(input); + + RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, originalInputRefs); + + return HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType, null); + } + + // this will create a project which will project out the column in positions + public static HiveProject createProjectWithoutColumn(RelNode input, Set positions) + throws CalciteSemanticException { + List originalInputRefs = Lists.transform(input.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + List copyInputRefs = new ArrayList<>(); + for (int i = 0; i < originalInputRefs.size(); i++) { + if (!positions.contains(i)) { + copyInputRefs.add(originalInputRefs.get(i)); + } + } + return HiveProject.create(input, copyInputRefs, null); + } + /** * Walks over an expression and determines whether it is constant. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index cf93ed8..a123f63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -45,7 +45,9 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -199,10 +201,18 @@ public RelNode createAggregate(RelNode child, boolean indicator, private static class HiveSetOpFactoryImpl implements SetOpFactory { @Override public RelNode createSetOp(SqlKind kind, List inputs, boolean all) { - if (kind != SqlKind.UNION) { - throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind); + if (kind == SqlKind.UNION) { + return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs); + } else if (kind == SqlKind.INTERSECT) { + return new HiveIntersect(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs, + all); + } else if (kind == SqlKind.EXCEPT) { + return new HiveExcept(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs, + all); + } else { + throw new IllegalStateException("Expected to get set operator of type Union, Intersect or Except(Minus). Found : " + + kind); } - return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExcept.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExcept.java new file mode 100644 index 0000000..a4afc77 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExcept.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Minus; +import org.apache.calcite.rel.core.SetOp; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; + +public class HiveExcept extends Minus { + + public HiveExcept(RelOptCluster cluster, RelTraitSet traits, List inputs, boolean all) { + super(cluster, traits, inputs, all); + } + + @Override + public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { + return new HiveExcept(this.getCluster(), traitSet, inputs, all); + } + + public void implement(Implementor implementor) { + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java new file mode 100644 index 0000000..19e1e02 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIntersect.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.List; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Intersect; +import org.apache.calcite.rel.core.SetOp; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; + +public class HiveIntersect extends Intersect { + + public HiveIntersect(RelOptCluster cluster, RelTraitSet traits, List inputs, boolean all) { + super(cluster, traits, inputs, all); + } + + @Override + public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { + return new HiveIntersect(this.getCluster(), traitSet, inputs, all); + } + + public void implement(Implementor implementor) { + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExceptRewriteRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExceptRewriteRule.java new file mode 100644 index 0000000..b63ea02 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExceptRewriteRule.java @@ -0,0 +1,375 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.Lists; + +/** + * Planner rule that rewrite + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept} + * Note, we only have 2 branches because of except's semantic. + * R1 Except(all) R2 + * R1 introduce VCol ‘2’, R2 introduce VCol ‘1’ + * R3 = GB(R1 on all keys + VCol + count(VCol) as c) union all GB(R2 on all keys + VCol + count(VCol) as c) + * R4 = GB(R3 on all keys + sum(c) as a + sum(VCol*c) as b) we + * have m+n=a, 2m+n=b where m is the #row in R1 and n is the #row in R2 then + * m=b-a, n=2a-b, m-n=2b-3a + * if it is except (distinct) + * then R5 = Fil (b-a>0 && 2a-b=0) R6 = select only keys from R5 + * else R5 = Fil (2b-3a>0) R6 = UDTF (R5) which will explode the tuples based on 2b-3a. + * Note that NULLs are handled the same as other values. Please refer to the test cases. + */ +public class HiveExceptRewriteRule extends RelOptRule { + + public static final HiveExceptRewriteRule INSTANCE = new HiveExceptRewriteRule(); + + protected static final Logger LOG = LoggerFactory.getLogger(HiveIntersectRewriteRule.class); + + + // ~ Constructors ----------------------------------------------------------- + + private HiveExceptRewriteRule() { + super(operand(HiveExcept.class, any())); + } + + // ~ Methods ---------------------------------------------------------------- + + public void onMatch(RelOptRuleCall call) { + final HiveExcept hiveExcept = call.rel(0); + + final RelOptCluster cluster = hiveExcept.getCluster(); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + Builder bldr = new ImmutableList.Builder(); + + // 1st level GB: create a GB(R1 on all keys + VCol + count() as c) for each + // branch + try { + bldr.add(createFirstGB(hiveExcept.getInputs().get(0), true, cluster, rexBuilder)); + bldr.add(createFirstGB(hiveExcept.getInputs().get(1), false, cluster, rexBuilder)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + // create a union above all the branches + // the schema of union looks like this + // all keys + VCol + c + HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); + + // 2nd level GB: create a GB (all keys + sum(c) as a + sum(VCol*c) as b) for + // each branch + final List gbChildProjLst = Lists.newArrayList(); + final List groupSetPositions = Lists.newArrayList(); + int unionColumnSize = union.getRowType().getFieldList().size(); + for (int cInd = 0; cInd < unionColumnSize; cInd++) { + gbChildProjLst.add(rexBuilder.makeInputRef(union, cInd)); + // the last 2 columns are VCol and c + if (cInd < unionColumnSize - 2) { + groupSetPositions.add(cInd); + } + } + + try { + gbChildProjLst.add(multiply(rexBuilder.makeInputRef(union, unionColumnSize - 2), + rexBuilder.makeInputRef(union, unionColumnSize - 1), cluster, rexBuilder)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + RelNode gbInputRel = null; + try { + // Here we create a project for the following reasons: + // (1) GBy only accepts arg as a position of the input, however, we need to sum on VCol*c + // (2) This can better reuse the function createSingleArgAggCall. + gbInputRel = HiveProject.create(union, gbChildProjLst, null); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + // gbInputRel's schema is like this + // all keys + VCol + c + VCol*c + List aggregateCalls = Lists.newArrayList(); + RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory()); + + // sum(c) + AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, + TypeInfoFactory.longTypeInfo, unionColumnSize - 1, aggFnRetType); + aggregateCalls.add(aggregateCall); + + // sum(VCol*c) + aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, + TypeInfoFactory.longTypeInfo, unionColumnSize, aggFnRetType); + aggregateCalls.add(aggregateCall); + + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + HiveRelNode aggregateRel = new HiveAggregate(cluster, + cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, false, groupSet, null, + aggregateCalls); + + // the schema after GB is like this + // all keys + sum(c) as a + sum(VCol*c) as b + // the column size is the same as unionColumnSize; + // (1) for except distinct add a filter (b-a>0 && 2a-b=0) + // i.e., a > 0 && 2a = b + // then add the project + // (2) for except all add a project to change it to + // (2b-3a) + all keys + // then add the UDTF + + if (!hiveExcept.all) { + RelNode filterRel = null; + try { + filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + aggregateRel, makeFilterExprForExceptDistinct(aggregateRel, unionColumnSize, cluster, + rexBuilder)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + // finally add a project to project out the last 2 columns + Set projectOutColumnPositions = new HashSet<>(); + projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 2); + projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1); + try { + call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, + projectOutColumnPositions)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + } else { + List originalInputRefs = Lists.transform(aggregateRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + + List copyInputRefs = new ArrayList<>(); + try { + copyInputRefs.add(makeExprForExceptAll(aggregateRel, unionColumnSize, cluster, rexBuilder)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + for (int i = 0; i < originalInputRefs.size() - 2; i++) { + copyInputRefs.add(originalInputRefs.get(i)); + } + RelNode srcRel = null; + try { + srcRel = HiveProject.create(aggregateRel, copyInputRefs, null); + HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel); + // finally add a project to project out the 1st columns + Set projectOutColumnPositions = new HashSet<>(); + projectOutColumnPositions.add(0); + call.transformTo(HiveCalciteUtil + .createProjectWithoutColumn(udtf, projectOutColumnPositions)); + } catch (SemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + } + } + + private RelNode createFirstGB(RelNode input, boolean left, RelOptCluster cluster, + RexBuilder rexBuilder) throws CalciteSemanticException { + final List gbChildProjLst = Lists.newArrayList(); + final List groupSetPositions = Lists.newArrayList(); + for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) { + gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd)); + groupSetPositions.add(cInd); + } + if (left) { + gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(2))); + } else { + gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1))); + } + + // also add the last VCol + groupSetPositions.add(input.getRowType().getFieldList().size()); + + // create the project before GB + RelNode gbInputRel = HiveProject.create(input, gbChildProjLst, null); + + // groupSetPosition includes all the positions + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + + List aggregateCalls = Lists.newArrayList(); + RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory()); + + AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, + TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType); + aggregateCalls.add(aggregateCall); + return new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, + false, groupSet, null, aggregateCalls); + } + + private RexNode multiply(RexNode r1, RexNode r2, RelOptCluster cluster, RexBuilder rexBuilder) + throws CalciteSemanticException { + List childRexNodeLst = new ArrayList(); + childRexNodeLst.add(r1); + childRexNodeLst.add(r2); + ImmutableList.Builder calciteArgTypesBldr = new ImmutableList.Builder(); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + return rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("*", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true), + childRexNodeLst); + } + + private RexNode makeFilterExprForExceptDistinct(HiveRelNode input, int columnSize, + RelOptCluster cluster, RexBuilder rexBuilder) throws CalciteSemanticException { + List childRexNodeLst = new ArrayList(); + RexInputRef a = rexBuilder.makeInputRef(input, columnSize - 2); + RexLiteral zero = rexBuilder.makeBigintLiteral(new BigDecimal(0)); + childRexNodeLst.add(a); + childRexNodeLst.add(zero); + ImmutableList.Builder calciteArgTypesBldr = new ImmutableList.Builder(); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + // a>0 + RexNode aMorethanZero = rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn(">", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + childRexNodeLst = new ArrayList(); + RexLiteral two = rexBuilder.makeBigintLiteral(new BigDecimal(2)); + childRexNodeLst.add(a); + childRexNodeLst.add(two); + // 2*a + RexNode twoa = rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("*", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + childRexNodeLst = new ArrayList(); + RexInputRef b = rexBuilder.makeInputRef(input, columnSize - 1); + childRexNodeLst.add(twoa); + childRexNodeLst.add(b); + // 2a=b + RexNode twoaEqualTob = rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + childRexNodeLst = new ArrayList(); + childRexNodeLst.add(aMorethanZero); + childRexNodeLst.add(twoaEqualTob); + // a>0 && 2a=b + return rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("and", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + } + + private RexNode makeExprForExceptAll(HiveRelNode input, int columnSize, RelOptCluster cluster, + RexBuilder rexBuilder) throws CalciteSemanticException { + List childRexNodeLst = new ArrayList(); + ImmutableList.Builder calciteArgTypesBldr = new ImmutableList.Builder(); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + RexInputRef a = rexBuilder.makeInputRef(input, columnSize - 2); + RexLiteral three = rexBuilder.makeBigintLiteral(new BigDecimal(3)); + childRexNodeLst.add(three); + childRexNodeLst.add(a); + RexNode threea = rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("*", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + + RexLiteral two = rexBuilder.makeBigintLiteral(new BigDecimal(2)); + RexInputRef b = rexBuilder.makeInputRef(input, columnSize - 1); + + // 2*b + childRexNodeLst = new ArrayList(); + childRexNodeLst.add(two); + childRexNodeLst.add(b); + RexNode twob = rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("*", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + + // 2b-3a + childRexNodeLst = new ArrayList(); + childRexNodeLst.add(twob); + childRexNodeLst.add(threea); + return rexBuilder.makeCall( + SqlFunctionConverter.getCalciteFn("-", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), false), + childRexNodeLst); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectMergeRule.java new file mode 100644 index 0000000..ba422af --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectMergeRule.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.calcite.util.Util; + +/** + * Planner rule that merges multiple intersect into one + * Before the rule, it is + * intersect-branch1 + * |-----intersect-branch2 + * |-----branch3 + * After the rule, it becomes + * intersect-branch1 + * |-----branch2 + * |-----branch3 + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect} + */ +public class HiveIntersectMergeRule extends RelOptRule { + + public static final HiveIntersectMergeRule INSTANCE = new HiveIntersectMergeRule(); + + // ~ Constructors ----------------------------------------------------------- + + private HiveIntersectMergeRule() { + super( + operand(HiveIntersect.class, operand(RelNode.class, any()), operand(RelNode.class, any()))); + } + + // ~ Methods ---------------------------------------------------------------- + + public void onMatch(RelOptRuleCall call) { + final HiveIntersect topHiveIntersect = call.rel(0); + + final HiveIntersect bottomHiveIntersect; + if (call.rel(2) instanceof HiveIntersect) { + bottomHiveIntersect = call.rel(2); + } else if (call.rel(1) instanceof HiveIntersect) { + bottomHiveIntersect = call.rel(1); + } else { + return; + } + + boolean all = topHiveIntersect.all; + // top is distinct, we can always merge whether bottom is distinct or not + // top is all, we can only merge if bottom is also all + // that is to say, we should bail out if top is all and bottom is distinct + if (all && !bottomHiveIntersect.all) { + return; + } + + List inputs = new ArrayList<>(); + if (call.rel(2) instanceof HiveIntersect) { + inputs.add(topHiveIntersect.getInput(0)); + inputs.addAll(bottomHiveIntersect.getInputs()); + } else { + inputs.addAll(bottomHiveIntersect.getInputs()); + inputs.addAll(Util.skip(topHiveIntersect.getInputs())); + } + + HiveIntersect newIntersect = (HiveIntersect) topHiveIntersect.copy( + topHiveIntersect.getTraitSet(), inputs, all); + call.transformTo(newIntersect); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectRewriteRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectRewriteRule.java new file mode 100644 index 0000000..16e4b99 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveIntersectRewriteRule.java @@ -0,0 +1,250 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Intersect; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.esotericsoftware.minlog.Log; +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.Lists; + +/** + * Planner rule that rewrite + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect} + * Rewrite: (GB-Union All-GB)-GB-UDTF (on all attributes) + + Example: R1 Intersect All R2 + R3 = GB(R1 on all attributes + count() as c) union all GB(R2 on all attributes + count() as c) + R4 = GB(R3 on all attributes + count(c) as cnt + min(c) as m) + R5 = Fil ( cnt == #branch ) + + If it is intersect all then + R6 = UDTF (R5) which will explode the tuples based on min(c). + R7 = Proj(R6 on all attributes) + Else + R6 = Proj(R5 on all attributes) +else + */ +public class HiveIntersectRewriteRule extends RelOptRule { + + public static final HiveIntersectRewriteRule INSTANCE = new HiveIntersectRewriteRule(); + + protected static final Logger LOG = LoggerFactory.getLogger(HiveIntersectRewriteRule.class); + + + // ~ Constructors ----------------------------------------------------------- + + private HiveIntersectRewriteRule() { + super(operand(HiveIntersect.class, any())); + } + + // ~ Methods ---------------------------------------------------------------- + + public void onMatch(RelOptRuleCall call) { + final HiveIntersect hiveIntersect = call.rel(0); + + final RelOptCluster cluster = hiveIntersect.getCluster(); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + int numOfBranch = hiveIntersect.getInputs().size(); + Builder bldr = new ImmutableList.Builder(); + + // 1st level GB: create a GB (col0, col1, count(1) as c) for each branch + for (int index = 0; index < numOfBranch; index++) { + RelNode input = hiveIntersect.getInputs().get(index); + final List gbChildProjLst = Lists.newArrayList(); + final List groupSetPositions = Lists.newArrayList(); + for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) { + gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd)); + groupSetPositions.add(cInd); + } + gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1))); + + // create the project before GB because we need a new project with extra column '1'. + RelNode gbInputRel = null; + try { + gbInputRel = HiveProject.create(input, gbChildProjLst, null); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + // groupSetPosition includes all the positions + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + + List aggregateCalls = Lists.newArrayList(); + RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory()); + + // count(1), 1's position is input.getRowType().getFieldList().size() + AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, + TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType); + aggregateCalls.add(aggregateCall); + + HiveRelNode aggregateRel = new HiveAggregate(cluster, + cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, false, groupSet, null, + aggregateCalls); + bldr.add(aggregateRel); + } + + // create a union above all the branches + HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); + + // 2nd level GB: create a GB (col0, col1, count(c)) for each branch + final List groupSetPositions = Lists.newArrayList(); + // the index of c + int cInd = union.getRowType().getFieldList().size() - 1; + for (int index = 0; index < union.getRowType().getFieldList().size(); index++) { + if (index != cInd) { + groupSetPositions.add(index); + } + } + + List aggregateCalls = Lists.newArrayList(); + RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory()); + + AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, + TypeInfoFactory.longTypeInfo, cInd, aggFnRetType); + aggregateCalls.add(aggregateCall); + if (hiveIntersect.all) { + aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, + TypeInfoFactory.longTypeInfo, cInd, aggFnRetType); + aggregateCalls.add(aggregateCall); + } + + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + HiveRelNode aggregateRel = new HiveAggregate(cluster, + cluster.traitSetOf(HiveRelNode.CONVENTION), union, false, groupSet, null, aggregateCalls); + + // add a filter count(c) = #branches + int countInd = cInd; + List childRexNodeLst = new ArrayList(); + RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd); + RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch)); + childRexNodeLst.add(ref); + childRexNodeLst.add(literal); + ImmutableList.Builder calciteArgTypesBldr = new ImmutableList.Builder(); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, + cluster.getTypeFactory())); + RexNode factoredFilterExpr = null; + try { + factoredFilterExpr = rexBuilder + .makeCall( + SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), + TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), + true), childRexNodeLst); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + aggregateRel, factoredFilterExpr); + + if (!hiveIntersect.all) { + // the schema for intersect distinct is like this + // R3 on all attributes + count(c) as cnt + // finally add a project to project out the last column + Set projectOutColumnPositions = new HashSet<>(); + projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1); + try { + call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel,projectOutColumnPositions)); + } catch (CalciteSemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + } else { + // the schema for intersect all is like this + // R3 + count(c) as cnt + min(c) as m + // we create a input project for udtf whose schema is like this + // min(c) as m + R3 + List originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + + List copyInputRefs = new ArrayList<>(); + copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1)); + for (int i = 0; i < originalInputRefs.size() - 2; i++) { + copyInputRefs.add(originalInputRefs.get(i)); + } + RelNode srcRel = null; + try { + srcRel = HiveProject.create(filterRel, copyInputRefs, null); + HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel); + // finally add a project to project out the 1st column + Set projectOutColumnPositions = new HashSet<>(); + projectOutColumnPositions.add(0); + call.transformTo(HiveCalciteUtil + .createProjectWithoutColumn(udtf, projectOutColumnPositions)); + } catch (SemanticException e) { + LOG.debug(e.toString()); + throw new RuntimeException(e); + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 63aa086..e78c8e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -342,8 +342,8 @@ private Schema getRowSchema(String tblAlias) { } private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException { - Schema s; - ASTNode ast; + Schema s = null; + ASTNode ast = null; if (r instanceof TableScan) { TableScan f = (TableScan) r; @@ -379,19 +379,15 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException s = left.schema; } } else if (r instanceof Union) { - RelNode leftInput = ((Union) r).getInput(0); - RelNode rightInput = ((Union) r).getInput(1); - - ASTConverter leftConv = new ASTConverter(leftInput, this.derivedTableCount); - ASTConverter rightConv = new ASTConverter(rightInput, this.derivedTableCount); - ASTNode leftAST = leftConv.convert(); - ASTNode rightAST = rightConv.convert(); - - ASTNode unionAST = getUnionAllAST(leftAST, rightAST); - - String sqAlias = nextAlias(); - ast = ASTBuilder.subQuery(unionAST, sqAlias); - s = new Schema((Union) r, sqAlias); + Union u = ((Union) r); + ASTNode left = new ASTConverter(((Union) r).getInput(0), this.derivedTableCount).convert(); + for (int ind = 1; ind < u.getInputs().size(); ind++) { + left = getUnionAllAST(left, new ASTConverter(((Union) r).getInput(ind), + this.derivedTableCount).convert()); + String sqAlias = nextAlias(); + ast = ASTBuilder.subQuery(left, sqAlias); + s = new Schema((Union) r, sqAlias); + } } else { ASTConverter src = new ASTConverter(r, this.derivedTableCount); ASTNode srcAST = src.convert(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d32a0a7..be65e80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -67,6 +67,7 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; @@ -140,8 +141,10 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; @@ -153,6 +156,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; @@ -161,6 +165,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; @@ -196,6 +202,7 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.QBExpr.Opcode; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; @@ -1185,6 +1192,32 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + //0. SetOp rewrite + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + HiveSortLimitPullUpConstantsRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: HiveSortLimitPullUpConstantsRule rule"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + ProjectRemoveRule.INSTANCE, HiveIntersectMergeRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: HiveIntersectMerge rule"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + HiveIntersectRewriteRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: HiveIntersectRewrite rule"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.BOTTOM_UP, + HiveExceptRewriteRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: HiveExceptRewrite rule"); + //1. Distinct aggregate rewrite // Run this optimization early, since it is expanding the operator pipeline. if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && @@ -1238,7 +1271,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); rules.add(HiveSortMergeRule.INSTANCE); - rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1375,18 +1407,16 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, } @SuppressWarnings("nls") - private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + private RelNode genSetOpLogicalPlan(Opcode opcode, String alias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException { - HiveUnion unionRel = null; - // 1. Get Row Resolvers, Column map for original left and right input of - // Union Rel + // SetOp Rel RowResolver leftRR = this.relToHiveRR.get(leftRel); RowResolver rightRR = this.relToHiveRR.get(rightRel); HashMap leftmap = leftRR.getFieldMap(leftalias); HashMap rightmap = rightRR.getFieldMap(rightalias); - // 2. Validate that Union is feasible according to Hive (by using type + // 2. Validate that SetOp is feasible according to Hive (by using type // info from RR) if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); @@ -1395,8 +1425,8 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode ASTNode tabref = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo() .getSrcForAlias(getQB().getAliases().get(0)); - // 3. construct Union Output RR using original left & right Input - RowResolver unionoutRR = new RowResolver(); + // 3. construct SetOp Output RR using original left & right Input + RowResolver setOpOutRR = new RowResolver(); Iterator> lIter = leftmap.entrySet().iterator(); Iterator> rIter = rightmap.entrySet().iterator(); @@ -1412,18 +1442,18 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode rInfo.getType()); if (commonTypeInfo == null) { throw new SemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match: Column " + field + "Schema of both sides of setop should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table")); } - ColumnInfo unionColInfo = new ColumnInfo(lInfo); - unionColInfo.setType(commonTypeInfo); - unionoutRR.put(unionalias, field, unionColInfo); + ColumnInfo setOpColInfo = new ColumnInfo(lInfo); + setOpColInfo.setType(commonTypeInfo); + setOpOutRR.put(alias, field, setOpColInfo); } // 4. Determine which columns requires cast on left/right input (Calcite - // requires exact types on both sides of union) + // requires exact types on both sides of SetOp) boolean leftNeedsTypeCast = false; boolean rightNeedsTypeCast = false; List leftProjs = new ArrayList(); @@ -1438,7 +1468,7 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftFieldDT = leftRowDT.get(i).getType(); rightFieldDT = rightRowDT.get(i).getType(); if (!leftFieldDT.equals(rightFieldDT)) { - unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), + unionFieldDT = TypeConverter.convert(setOpOutRR.getColumnInfos().get(i).getType(), cluster.getTypeFactory()); if (!unionFieldDT.equals(leftFieldDT)) { leftNeedsTypeCast = true; @@ -1461,28 +1491,49 @@ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode // 5. Introduce Project Rel above original left/right inputs if cast is // needed for type parity - RelNode unionLeftInput = leftRel; - RelNode unionRightInput = rightRel; + RelNode setOpLeftInput = leftRel; + RelNode setOpRightInput = rightRel; if (leftNeedsTypeCast) { - unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() + setOpLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() .getFieldNames()); } if (rightNeedsTypeCast) { - unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() + setOpRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() .getFieldNames()); } - // 6. Construct Union Rel + // 6. Construct SetOp Rel Builder bldr = new ImmutableList.Builder(); - bldr.add(unionLeftInput); - bldr.add(unionRightInput); - unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); - - relToHiveRR.put(unionRel, unionoutRR); - relToHiveColNameCalcitePosMap.put(unionRel, - this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); - - return unionRel; + bldr.add(setOpLeftInput); + bldr.add(setOpRightInput); + SetOp setOpRel = null; + switch (opcode) { + case UNION: + setOpRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); + break; + case INTERSECT: + setOpRel = new HiveIntersect(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(), + false); + break; + case INTERSECTALL: + setOpRel = new HiveIntersect(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(), + true); + break; + case EXCEPT: + setOpRel = new HiveExcept(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(), + false); + break; + case EXCEPTALL: + setOpRel = new HiveExcept(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(), + true); + break; + default: + throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg(opcode.toString())); + } + relToHiveRR.put(setOpRel, setOpOutRR); + relToHiveColNameCalcitePosMap.put(setOpRel, + this.buildHiveToCalciteColumnMap(setOpOutRR, setOpRel)); + return setOpRel; } private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, @@ -2077,9 +2128,9 @@ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map */ private class AggInfo { private final List m_aggParams; - private final TypeInfo m_returnType; - private final String m_udfName; - private final boolean m_distinct; + private final TypeInfo m_returnType; + private final String m_udfName; + private final boolean m_distinct; private AggInfo(List aggParams, TypeInfo returnType, String udfName, boolean isDistinct) { @@ -2089,7 +2140,7 @@ private AggInfo(List aggParams, TypeInfo returnType, String udfNam m_distinct = isDistinct; } } - + private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, RexNodeConverter converter, HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { @@ -3349,17 +3400,21 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str } private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + switch (qbexpr.getOpcode()) { + case NULLOP: return genLogicalPlan(qbexpr.getQB(), false); - } - if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + case UNION: + case INTERSECT: + case INTERSECTALL: + case EXCEPT: + case EXCEPTALL: RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2()); - - return genUnionLogicalPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, - qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + return genSetOpLogicalPlan(qbexpr.getOpcode(), qbexpr.getAlias(), qbexpr.getQBExpr1() + .getAlias(), qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + default: + return null; } - return null; } private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 5d3fa6a..17985d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -96,6 +96,7 @@ KW_CLUSTER: 'CLUSTER'; KW_DISTRIBUTE: 'DISTRIBUTE'; KW_SORT: 'SORT'; KW_UNION: 'UNION'; +KW_EXCEPT: 'EXCEPT'; KW_LOAD: 'LOAD'; KW_EXPORT: 'EXPORT'; KW_IMPORT: 'IMPORT'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 5c16c55..7bf02bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -88,6 +88,10 @@ TOK_DISTRIBUTEBY; TOK_SORTBY; TOK_UNIONALL; TOK_UNIONDISTINCT; +TOK_INTERSECTALL; +TOK_INTERSECTDISTINCT; +TOK_EXCEPTALL; +TOK_EXCEPTDISTINCT; TOK_JOIN; TOK_LEFTOUTERJOIN; TOK_RIGHTOUTERJOIN; @@ -449,6 +453,8 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_DISTRIBUTE", "DISTRIBUTE"); xlateMap.put("KW_SORT", "SORT"); xlateMap.put("KW_UNION", "UNION"); + xlateMap.put("KW_INTERSECT", "INTERSECT"); + xlateMap.put("KW_EXCEPT", "EXCEPT"); xlateMap.put("KW_LOAD", "LOAD"); xlateMap.put("KW_DATA", "DATA"); xlateMap.put("KW_INPATH", "INPATH"); @@ -2302,6 +2308,12 @@ setOperator @after { popMsg(state); } : KW_UNION KW_ALL -> ^(TOK_UNIONALL) | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT) + | KW_INTERSECT KW_ALL -> ^(TOK_INTERSECTALL) + | KW_INTERSECT KW_DISTINCT -> ^(TOK_INTERSECTDISTINCT) + | KW_EXCEPT KW_ALL -> ^(TOK_EXCEPTALL) + | KW_EXCEPT KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT) + | KW_MINUS KW_ALL -> ^(TOK_EXCEPTALL) + | KW_MINUS KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT) ; queryStatementExpression @@ -2457,7 +2469,7 @@ setOpSelectStatement[CommonTree t] ) ) -> {$setOpSelectStatement.tree != null && u.tree.getType()!=HiveParser.TOK_UNIONDISTINCT}? - ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) + ^($u {$setOpSelectStatement.tree} $b) -> {$setOpSelectStatement.tree == null && u.tree.getType()==HiveParser.TOK_UNIONDISTINCT}? ^(TOK_QUERY ^(TOK_FROM @@ -2471,9 +2483,13 @@ setOpSelectStatement[CommonTree t] ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF)) ) ) - -> ^(TOK_UNIONALL {$t} $b) + -> ^($u {$t} $b) )+ - -> {$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_UNIONALL}? + -> {$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_UNIONALL + ||$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_INTERSECTDISTINCT + ||$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_INTERSECTALL + ||$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_EXCEPTDISTINCT + ||$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_EXCEPTALL}? ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 50987c3..f79960a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -698,7 +698,7 @@ nonReserved | KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR | KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG - | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE + | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java index cccf0f6..7601509 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java @@ -35,7 +35,7 @@ * */ public static enum Opcode { - NULLOP, UNION, INTERSECT, DIFF + NULLOP, UNION, INTERSECT, INTERSECTALL, EXCEPT, EXCEPTALL, DIFF }; private Opcode opcode; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9d58193..1c1bde1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -484,8 +484,7 @@ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, throws SemanticException { assert (ast.getToken() != null); - switch (ast.getToken().getType()) { - case HiveParser.TOK_QUERY: { + if (ast.getToken().getType() == HiveParser.TOK_QUERY) { QB qb = new QB(id, alias, true); qb.setInsideView(insideView); Phase1Ctx ctx_1 = initPhase1Ctx(); @@ -494,25 +493,41 @@ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, qbexpr.setOpcode(QBExpr.Opcode.NULLOP); qbexpr.setQB(qb); } - break; - case HiveParser.TOK_UNIONALL: { - qbexpr.setOpcode(QBExpr.Opcode.UNION); + // setop + else { + switch (ast.getToken().getType()) { + case HiveParser.TOK_UNIONALL: + qbexpr.setOpcode(QBExpr.Opcode.UNION); + break; + case HiveParser.TOK_INTERSECTALL: + qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL); + break; + case HiveParser.TOK_INTERSECTDISTINCT: + qbexpr.setOpcode(QBExpr.Opcode.INTERSECT); + break; + case HiveParser.TOK_EXCEPTALL: + qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL); + break; + case HiveParser.TOK_EXCEPTDISTINCT: + qbexpr.setOpcode(QBExpr.Opcode.EXCEPT); + break; + default: + throw new SemanticException("Unsupported set operator."); + } // query 1 assert (ast.getChild(0) != null); QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1); - doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, - alias + SUBQUERY_TAG_1, insideView); + doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, alias + + SUBQUERY_TAG_1, insideView); qbexpr.setQBExpr1(qbexpr1); // query 2 assert (ast.getChild(1) != null); QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2); - doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, - alias + SUBQUERY_TAG_2, insideView); + doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, alias + + SUBQUERY_TAG_2, insideView); qbexpr.setQBExpr2(qbexpr2); } - break; - } } private LinkedHashMap doPhase1GetAggregationsFromSelect( diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index a427803..0dc6b19 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -30,7 +30,7 @@ /** * Parser tests for SQL11 Reserved KeyWords. Please find more information in - * HIVE-6617. Total number : 81 + * HIVE-6617. Total number : 82 * ALL,ALTER,ARRAY,AS,AUTHORIZATION,BETWEEN,BIGINT,BINARY * ,BOOLEAN,BOTH,BY,CONSTRAINT * ,CREATE,CUBE,CURRENT_DATE,CURRENT_TIMESTAMP,CURSOR, @@ -38,7 +38,7 @@ * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,FOR * ,FOREIGN,FULL,GRANT,GROUP,GROUPING * ,IMPORT,IN,INNER,INSERT,INT,INTERSECT,INTO,IS - * ,LATERAL,LEFT,LIKE,LOCAL,NONE,NULL + * ,LATERAL,LEFT,LIKE,LOCAL,MINUS,NONE,NULL * ,OF,ORDER,OUT,OUTER,PARTITION,PERCENT,PRECISION * ,PRIMARY,PROCEDURE,RANGE,READS, * REFERENCES,REGEXP,REVOKE,RIGHT,RLIKE,ROLLUP,ROW @@ -670,19 +670,32 @@ public void testSQL11ReservedKeyWords_LOCAL() { } @Test - public void testSQL11ReservedKeyWords_NONE() { + public void testSQL11ReservedKeyWords_MINUS() { try { - parse("CREATE TABLE NONE (col STRING)"); + parse("CREATE TABLE MINUS (col STRING)"); Assert.assertFalse("Expected ParseException", true); } catch (ParseException ex) { Assert.assertEquals( "Failure didn't match.", - "line 1:13 cannot recognize input near 'NONE' '(' 'col' in table name", + "line 1:13 cannot recognize input near 'MINUS' '(' 'col' in table name", ex.getMessage()); } } @Test + public void testSQL11ReservedKeyWords_NONE() { + try { + parse("CREATE TABLE NONE (col STRING)"); + Assert.assertFalse("Expected ParseException", true); + } catch (ParseException ex) { + Assert.assertEquals( + "Failure didn't match.", + "line 1:13 cannot recognize input near 'NONE' '(' 'col' in table name", + ex.getMessage()); + } + } + + @Test public void testSQL11ReservedKeyWords_NULL() { try { parse("CREATE TABLE NULL (col STRING)"); diff --git a/ql/src/test/queries/clientpositive/except_all.q b/ql/src/test/queries/clientpositive/except_all.q new file mode 100644 index 0000000..3b62459 --- /dev/null +++ b/ql/src/test/queries/clientpositive/except_all.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int); + +insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL); + +create table b(key bigint); + +insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL); + +select * from a except all select * from b; + +drop table a; + +drop table b; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2); + +create table b(key int, value int); + +insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20); + +select * from a except all select * from b; + +select * from b except all select * from a; + +select * from b except all select * from a intersect distinct select * from b; + +select * from b except all select * from a except distinct select * from b; + +select * from a except all select * from b union all select * from a except distinct select * from b; + +select * from a except all select * from b union select * from a except distinct select * from b; + +select * from a except all select * from b except distinct select * from a except distinct select * from b; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2; + +explain select * from src except all select * from src; + +select * from src except all select * from src; + +explain select * from src except all select * from src except distinct select * from src except distinct select * from src; + +select * from src except all select * from src except distinct select * from src except distinct select * from src; + +explain select value from a group by value except distinct select key from b group by key; + +select value from a group by value except distinct select key from b group by key; diff --git a/ql/src/test/queries/clientpositive/except_distinct.q b/ql/src/test/queries/clientpositive/except_distinct.q new file mode 100644 index 0000000..9991447 --- /dev/null +++ b/ql/src/test/queries/clientpositive/except_distinct.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int); + +insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL); + +create table b(key bigint); + +insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL); + +select * from a except distinct select * from b; + +drop table a; + +drop table b; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2); + +create table b(key int, value int); + +insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20); + +select * from a except distinct select * from b; + +select * from b except distinct select * from a; + +select * from b except distinct select * from a intersect distinct select * from b; + +select * from b except distinct select * from a except distinct select * from b; + +select * from a except distinct select * from b union all select * from a except distinct select * from b; + +select * from a except distinct select * from b union select * from a except distinct select * from b; + +select * from a except distinct select * from b except distinct select * from a except distinct select * from b; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2; + +explain select * from src except distinct select * from src; + +select * from src except distinct select * from src; + +explain select * from src except distinct select * from src except distinct select * from src except distinct select * from src; + +select * from src except distinct select * from src except distinct select * from src except distinct select * from src; + +explain select value from a group by value except distinct select key from b group by key; + +select value from a group by value except distinct select key from b group by key; diff --git a/ql/src/test/queries/clientpositive/intersect_all.q b/ql/src/test/queries/clientpositive/intersect_all.q new file mode 100644 index 0000000..a0d4afd --- /dev/null +++ b/ql/src/test/queries/clientpositive/intersect_all.q @@ -0,0 +1,42 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,3),(2,3); + +create table b(key int, value int); + +insert into table b values (1,2),(2,3); + +select key, value, count(1) as c from a group by key, value; + +select * from a intersect all select * from b; + +select * from b intersect all select * from a intersect all select * from b; + +select * from a intersect all select * from b union all select * from a intersect all select * from b; + +select * from a intersect all select * from b union select * from a intersect all select * from b; + +select * from a intersect all select * from b intersect all select * from a intersect all select * from b; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2; + +explain select * from src intersect all select * from src; + +select * from src intersect all select * from src; + +explain select * from src intersect all select * from src intersect all select * from src intersect all select * from src; + +select * from src intersect all select * from src intersect all select * from src intersect all select * from src; + +explain select value from a group by value intersect all select key from b group by key; + +select value from a group by value intersect all select key from b group by key; diff --git a/ql/src/test/queries/clientpositive/intersect_distinct.q b/ql/src/test/queries/clientpositive/intersect_distinct.q new file mode 100644 index 0000000..aa8155a --- /dev/null +++ b/ql/src/test/queries/clientpositive/intersect_distinct.q @@ -0,0 +1,42 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,3),(2,3); + +create table b(key int, value int); + +insert into table b values (1,2),(2,3); + +select key, count(1) as c from a group by key intersect all select value, max(key) as c from b group by value; + +select * from a intersect distinct select * from b; + +select * from b intersect distinct select * from a intersect distinct select * from b; + +select * from a intersect distinct select * from b union all select * from a intersect distinct select * from b; + +select * from a intersect distinct select * from b union select * from a intersect distinct select * from b; + +select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2; + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2; + +explain select * from src intersect distinct select * from src; + +select * from src intersect distinct select * from src; + +explain select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src; + +select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src; + +explain select value from a group by value intersect distinct select key from b group by key; + +select value from a group by value intersect distinct select key from b group by key; diff --git a/ql/src/test/queries/clientpositive/intersect_merge.q b/ql/src/test/queries/clientpositive/intersect_merge.q new file mode 100644 index 0000000..0d8789e --- /dev/null +++ b/ql/src/test/queries/clientpositive/intersect_merge.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,3),(2,3); + +create table b(key int, value int); + +insert into table b values (1,2),(2,3); + +explain select * from b intersect distinct select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b; + +explain (select * from b intersect distinct select * from a) intersect distinct (select * from b intersect distinct select * from a); + +explain select * from b intersect distinct (select * from a intersect distinct (select * from b intersect distinct (select * from a intersect distinct select * from b))); + +explain (((select * from b intersect distinct select * from a) intersect distinct select * from b) intersect distinct select * from a) intersect distinct select * from b; + +explain select * from b intersect distinct (select * from a intersect distinct select * from b) intersect distinct select * from a intersect distinct select * from b; + +explain select * from b intersect distinct (select * from a intersect all select * from b); + +explain select * from b intersect all (select * from a intersect all select * from b); + +explain select * from b intersect all (select * from a intersect distinct select * from b); + diff --git a/ql/src/test/results/clientpositive/except_all.q.out b/ql/src/test/results/clientpositive/except_all.q.out new file mode 100644 index 0000000..17313c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/except_all.q.out @@ -0,0 +1,986 @@ +PREHOOK: query: create table a(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table b(key bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from a except all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +2 +2 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from a except all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +1 3 +PREHOOK: query: select * from b except all select * from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except all select * from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 2 +2 20 +PREHOOK: query: select * from b except all select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except all select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 2 +2 20 +PREHOOK: query: select * from b except all select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except all select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from a except all select * from b union all select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except all select * from b union all select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a except all select * from b union select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except all select * from b union select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a except all select * from b except distinct select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except all select * from b except distinct select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +1 2 +1 2 +2 3 +2 3 +2 20 +2 20 +PREHOOK: query: explain select * from src except all select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src except all select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col3) (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col4, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col3) (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col4, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ((2 * _col2) - (3 * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except all select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except all select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-7 + Stage-5 depends on stages: Stage-4, Stage-8 + Stage-6 is a root stage + Stage-7 is a root stage + Stage-8 is a root stage + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col3) (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col4, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col3) (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col4, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ((2 * _col2) - (3 * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 159 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 265 Data size: 2815 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 132 Data size: 1402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 22 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 233 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 233 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select value from a group by value except distinct select key from b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from a group by value except distinct select key from b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: value + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 2 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 > 0) and ((_col1 * 2) = _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 1 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value from a group by value except distinct select key from b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select value from a group by value except distinct select key from b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +3 diff --git a/ql/src/test/results/clientpositive/llap/except_distinct.q.out b/ql/src/test/results/clientpositive/llap/except_distinct.q.out new file mode 100644 index 0000000..5a19350 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/except_distinct.q.out @@ -0,0 +1,894 @@ +PREHOOK: query: create table a(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table b(key bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from b except distinct select * from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except distinct select * from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 20 +PREHOOK: query: select * from b except distinct select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except distinct select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 20 +PREHOOK: query: select * from b except distinct select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b except distinct select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from a except distinct select * from b union all select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except distinct select * from b union all select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a except distinct select * from b union select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except distinct select * from b union select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a except distinct select * from b except distinct select * from a except distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except distinct select * from b except distinct select * from a except distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +except distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +2 20 +PREHOOK: query: explain select * from src except distinct select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src except distinct select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select * from src except distinct select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src except distinct select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 8 <- Union 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 13 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 291 Data size: 56454 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 145 Data size: 28130 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 145 Data size: 28130 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 274 Data size: 53156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 26578 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 26578 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 41 Data size: 7626 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 291 Data size: 56454 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 145 Data size: 28130 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 145 Data size: 28130 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 145 Data size: 28130 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 24 Data size: 4656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 4656 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(2) + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 4464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 4656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 274 Data size: 53156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 26578 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 26578 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 26578 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0) and ((_col2 * 2) = _col3)) (type: boolean) + Statistics: Num rows: 22 Data size: 4268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 + Union 5 + Vertex: Union 5 + Union 7 + Vertex: Union 7 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except distinct select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except distinct select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select value from a group by value except distinct select key from b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from a group by value except distinct select key from b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: value + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(2) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 2 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 > 0) and ((_col1 * 2) = _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 1 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value from a group by value except distinct select key from b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select value from a group by value except distinct select key from b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +3 diff --git a/ql/src/test/results/clientpositive/llap/intersect_all.q.out b/ql/src/test/results/clientpositive/llap/intersect_all.q.out new file mode 100644 index 0000000..6d8b99d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/intersect_all.q.out @@ -0,0 +1,1697 @@ +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select key, value, count(1) as c from a group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select key, value, count(1) as c from a group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +1 2 2 +1 3 1 +2 3 1 +PREHOOK: query: select * from a intersect all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from b intersect all select * from a intersect all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b intersect all select * from a intersect all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect all select * from b union all select * from a intersect all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect all select * from b union all select * from a intersect all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect all select * from b union select * from a intersect all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect all select * from b union select * from a intersect all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect all select * from b intersect all select * from a intersect all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect all select * from b intersect all select * from a intersect all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +1 2 +1 2 +2 3 +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect all +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: explain select * from src intersect all select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src intersect all select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col3 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src intersect all select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src intersect all select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: explain select * from src intersect all select * from src intersect all select * from src intersect all select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src intersect all select * from src intersect all select * from src intersect all select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col3 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src intersect all select * from src intersect all select * from src intersect all select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src intersect all select * from src intersect all select * from src intersect all select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: explain select value from a group by value intersect all select key from b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from a group by value intersect all select key from b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: value + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value from a group by value intersect all select key from b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select value from a group by value intersect all select key from b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 diff --git a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out new file mode 100644 index 0000000..69e0b17 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out @@ -0,0 +1,1292 @@ +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select key, count(1) as c from a group by key intersect all select value, max(key) as c from b group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(1) as c from a group by key intersect all select value, max(key) as c from b group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 1 +PREHOOK: query: select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from b intersect distinct select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b intersect distinct select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect distinct select * from b union all select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect distinct select * from b union all select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect distinct select * from b union select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect distinct select * from b union select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect distinct +select * from (select b.value as key, a.key as value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: explain select * from src intersect distinct select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src intersect distinct select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col2 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src intersect distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src intersect distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 +120 val_120 +125 val_125 +126 val_126 +128 val_128 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +136 val_136 +137 val_137 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +149 val_149 +15 val_15 +150 val_150 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +165 val_165 +166 val_166 +167 val_167 +168 val_168 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +174 val_174 +175 val_175 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +192 val_192 +193 val_193 +194 val_194 +195 val_195 +196 val_196 +197 val_197 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +205 val_205 +207 val_207 +208 val_208 +209 val_209 +213 val_213 +214 val_214 +216 val_216 +217 val_217 +218 val_218 +219 val_219 +221 val_221 +222 val_222 +223 val_223 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +230 val_230 +233 val_233 +235 val_235 +237 val_237 +238 val_238 +239 val_239 +24 val_24 +241 val_241 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +278 val_278 +28 val_28 +280 val_280 +281 val_281 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +308 val_308 +309 val_309 +310 val_310 +311 val_311 +315 val_315 +316 val_316 +317 val_317 +318 val_318 +321 val_321 +322 val_322 +323 val_323 +325 val_325 +327 val_327 +33 val_33 +331 val_331 +332 val_332 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +344 val_344 +345 val_345 +348 val_348 +35 val_35 +351 val_351 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +368 val_368 +369 val_369 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +396 val_396 +397 val_397 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +402 val_402 +403 val_403 +404 val_404 +406 val_406 +407 val_407 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +414 val_414 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +421 val_421 +424 val_424 +427 val_427 +429 val_429 +43 val_43 +430 val_430 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +459 val_459 +460 val_460 +462 val_462 +463 val_463 +466 val_466 +467 val_467 +468 val_468 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +479 val_479 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +5 val_5 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +69 val_69 +70 val_70 +72 val_72 +74 val_74 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: explain select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col2 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src intersect distinct select * from src intersect distinct select * from src intersect distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 +120 val_120 +125 val_125 +126 val_126 +128 val_128 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +136 val_136 +137 val_137 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +149 val_149 +15 val_15 +150 val_150 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +165 val_165 +166 val_166 +167 val_167 +168 val_168 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +174 val_174 +175 val_175 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +192 val_192 +193 val_193 +194 val_194 +195 val_195 +196 val_196 +197 val_197 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +205 val_205 +207 val_207 +208 val_208 +209 val_209 +213 val_213 +214 val_214 +216 val_216 +217 val_217 +218 val_218 +219 val_219 +221 val_221 +222 val_222 +223 val_223 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +230 val_230 +233 val_233 +235 val_235 +237 val_237 +238 val_238 +239 val_239 +24 val_24 +241 val_241 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +278 val_278 +28 val_28 +280 val_280 +281 val_281 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +308 val_308 +309 val_309 +310 val_310 +311 val_311 +315 val_315 +316 val_316 +317 val_317 +318 val_318 +321 val_321 +322 val_322 +323 val_323 +325 val_325 +327 val_327 +33 val_33 +331 val_331 +332 val_332 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +344 val_344 +345 val_345 +348 val_348 +35 val_35 +351 val_351 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +368 val_368 +369 val_369 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +396 val_396 +397 val_397 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +402 val_402 +403 val_403 +404 val_404 +406 val_406 +407 val_407 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +414 val_414 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +421 val_421 +424 val_424 +427 val_427 +429 val_429 +43 val_43 +430 val_430 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +459 val_459 +460 val_460 +462 val_462 +463 val_463 +466 val_466 +467 val_467 +468 val_468 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +479 val_479 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +5 val_5 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +69 val_69 +70 val_70 +72 val_72 +74 val_74 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: explain select value from a group by value intersect distinct select key from b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from a group by value intersect distinct select key from b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: value + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value from a group by value intersect distinct select key from b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select value from a group by value intersect distinct select key from b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +2 diff --git a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out new file mode 100644 index 0000000..40e657d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out @@ -0,0 +1,1956 @@ +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select * from b intersect distinct select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect distinct select * from a intersect distinct select * from b intersect distinct select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 5) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain (select * from b intersect distinct select * from a) intersect distinct (select * from b intersect distinct select * from a) +PREHOOK: type: QUERY +POSTHOOK: query: explain (select * from b intersect distinct select * from a) intersect distinct (select * from b intersect distinct select * from a) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from b intersect distinct (select * from a intersect distinct (select * from b intersect distinct (select * from a intersect distinct select * from b))) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect distinct (select * from a intersect distinct (select * from b intersect distinct (select * from a intersect distinct select * from b))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 5) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain (((select * from b intersect distinct select * from a) intersect distinct select * from b) intersect distinct select * from a) intersect distinct select * from b +PREHOOK: type: QUERY +POSTHOOK: query: explain (((select * from b intersect distinct select * from a) intersect distinct select * from b) intersect distinct select * from a) intersect distinct select * from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 5) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from b intersect distinct (select * from a intersect distinct select * from b) intersect distinct select * from a intersect distinct select * from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect distinct (select * from a intersect distinct select * from b) intersect distinct select * from a intersect distinct select * from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 5) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from b intersect distinct (select * from a intersect all select * from b) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect distinct (select * from a intersect all select * from b) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from b intersect all (select * from a intersect all select * from b) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect all (select * from a intersect all select * from b) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: int), col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from b intersect all (select * from a intersect distinct select * from b) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from b intersect all (select * from a intersect distinct select * from b) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 8 <- Union 3 (CONTAINS), Union 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: int), col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col2), count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Union 3 + Vertex: Union 3 + Union 7 + Vertex: Union 7 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +